DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] net/pcap: reduce time for stopping device
@ 2022-08-25  7:20 Yiding Zhou
  2022-08-25 10:09 ` Ferruh Yigit
  2022-09-06  8:05 ` [PATCH v2] net/pcap: fix timeout of " Yiding Zhou
  0 siblings, 2 replies; 20+ messages in thread
From: Yiding Zhou @ 2022-08-25  7:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, stable, Yiding Zhou

The pcap file will be synchronized to the disk when stopping the device.
It takes a long time if the file is large that would cause the
'detach sync request' timeout when the device is closed under multi-process
scenario.

This commit fixes the issue by performing synchronization in Tx path

Fixes: 4c173302c307 ("pcap: add new driver")
Cc: stable@dpdk.org

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
---
 drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..52eafa5674 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -3,7 +3,7 @@
  * Copyright(c) 2014 6WIND S.A.
  * All rights reserved.
  */
-
+#include <unistd.h>
 #include <time.h>
 
 #include <pcap.h>
@@ -38,6 +38,8 @@
 
 #define RTE_PMD_PCAP_MAX_QUEUES 16
 
+#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
+
 static char errbuf[PCAP_ERRBUF_SIZE];
 static struct timespec start_time;
 static uint64_t start_cycles;
@@ -47,6 +49,8 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
+RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
+
 struct queue_stat {
 	volatile unsigned long pkts;
 	volatile unsigned long bytes;
@@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
 
 RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
 
+static inline void
+pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes)
+{
+	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
+	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) > ETH_PCAP_SYNC_THRESHOLD)) {
+		if (!fdatasync(fileno(pcap_dump_file(dumper))))
+			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
+	}
+}
+
 static struct queue_missed_stat*
 queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
 {
@@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * process stops and to make sure the pcap file is actually written,
 	 * we flush the pcap dumper within each burst.
 	 */
-	pcap_dump_flush(dumper);
+	pcap_dumper_data_sync(dumper, tx_bytes);
 	dumper_q->tx_stat.pkts += num_tx;
 	dumper_q->tx_stat.bytes += tx_bytes;
 	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25  7:20 [PATCH] net/pcap: reduce time for stopping device Yiding Zhou
@ 2022-08-25 10:09 ` Ferruh Yigit
  2022-08-25 11:17   ` Zhou, YidingX
  2022-09-06  8:05 ` [PATCH v2] net/pcap: fix timeout of " Yiding Zhou
  1 sibling, 1 reply; 20+ messages in thread
From: Ferruh Yigit @ 2022-08-25 10:09 UTC (permalink / raw)
  To: Yiding Zhou, dev; +Cc: qi.z.zhang, stable

On 8/25/2022 8:20 AM, Yiding Zhou wrote:
> The pcap file will be synchronized to the disk when stopping the device.
> It takes a long time if the file is large that would cause the
> 'detach sync request' timeout when the device is closed under multi-process
> scenario.
> 
> This commit fixes the issue by performing synchronization in Tx path
> 
> Fixes: 4c173302c307 ("pcap: add new driver")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> ---
>   drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
>   1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
> index ec29fd6bc5..52eafa5674 100644
> --- a/drivers/net/pcap/pcap_ethdev.c
> +++ b/drivers/net/pcap/pcap_ethdev.c
> @@ -3,7 +3,7 @@
>    * Copyright(c) 2014 6WIND S.A.
>    * All rights reserved.
>    */
> -
> +#include <unistd.h>
>   #include <time.h>
>   
>   #include <pcap.h>
> @@ -38,6 +38,8 @@
>   
>   #define RTE_PMD_PCAP_MAX_QUEUES 16
>   
> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> +
>   static char errbuf[PCAP_ERRBUF_SIZE];
>   static struct timespec start_time;
>   static uint64_t start_cycles;
> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
>   static uint64_t timestamp_rx_dynflag;
>   static int timestamp_dynfield_offset = -1;
>   
> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> +
>   struct queue_stat {
>   	volatile unsigned long pkts;
>   	volatile unsigned long bytes;
> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
>   
>   RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
>   
> +static inline void
> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes)
> +{
> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) > ETH_PCAP_SYNC_THRESHOLD)) {
> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> +	}
> +}
> +
>   static struct queue_missed_stat*
>   queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
>   {
> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	 * process stops and to make sure the pcap file is actually written,
>   	 * we flush the pcap dumper within each burst.
>   	 */
> -	pcap_dump_flush(dumper);
> +	pcap_dumper_data_sync(dumper, tx_bytes);

'pcap_dump_flush()' should be doing the same thing, to write buffer to 
file, isn't it working?

Can you check the return value of the 'pcap_dump_flush()' API, I wonder 
if it keeps failing, for some reason?

>   	dumper_q->tx_stat.pkts += num_tx;
>   	dumper_q->tx_stat.bytes += tx_bytes;
>   	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25 10:09 ` Ferruh Yigit
@ 2022-08-25 11:17   ` Zhou, YidingX
  2022-08-25 12:21     ` Ferruh Yigit
  0 siblings, 1 reply; 20+ messages in thread
From: Zhou, YidingX @ 2022-08-25 11:17 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Zhang, Qi Z, stable



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Thursday, August 25, 2022 6:09 PM
> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
> 
> On 8/25/2022 8:20 AM, Yiding Zhou wrote:
> > The pcap file will be synchronized to the disk when stopping the device.
> > It takes a long time if the file is large that would cause the 'detach
> > sync request' timeout when the device is closed under multi-process
> > scenario.
> >
> > This commit fixes the issue by performing synchronization in Tx path
> >
> > Fixes: 4c173302c307 ("pcap: add new driver")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > ---
> >   drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
> >   1 file changed, 16 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/pcap/pcap_ethdev.c
> > b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674 100644
> > --- a/drivers/net/pcap/pcap_ethdev.c
> > +++ b/drivers/net/pcap/pcap_ethdev.c
> > @@ -3,7 +3,7 @@
> >    * Copyright(c) 2014 6WIND S.A.
> >    * All rights reserved.
> >    */
> > -
> > +#include <unistd.h>
> >   #include <time.h>
> >
> >   #include <pcap.h>
> > @@ -38,6 +38,8 @@
> >
> >   #define RTE_PMD_PCAP_MAX_QUEUES 16
> >
> > +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> > +
> >   static char errbuf[PCAP_ERRBUF_SIZE];
> >   static struct timespec start_time;
> >   static uint64_t start_cycles;
> > @@ -47,6 +49,8 @@ static uint8_t iface_idx;
> >   static uint64_t timestamp_rx_dynflag;
> >   static int timestamp_dynfield_offset = -1;
> >
> > +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> > +
> >   struct queue_stat {
> >   	volatile unsigned long pkts;
> >   	volatile unsigned long bytes;
> > @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
> >
> >   RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
> >
> > +static inline void
> > +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {
> > +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> > +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
> ETH_PCAP_SYNC_THRESHOLD)) {
> > +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> > +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> > +	}
> > +}
> > +
> >   static struct queue_missed_stat*
> >   queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
> >   {
> > @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
> **bufs, uint16_t nb_pkts)
> >   	 * process stops and to make sure the pcap file is actually written,
> >   	 * we flush the pcap dumper within each burst.
> >   	 */
> > -	pcap_dump_flush(dumper);
> > +	pcap_dumper_data_sync(dumper, tx_bytes);
> 
> 'pcap_dump_flush()' should be doing the same thing, to write buffer to file,
> isn't it working?
> 
> Can you check the return value of the 'pcap_dump_flush()' API, I wonder if it
> keeps failing, for some reason?
> 

'pcap_dump_flush()' returns 0 each time without error, it calls 'fflush()' to flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to ensure data is written to disk.

> >   	dumper_q->tx_stat.pkts += num_tx;
> >   	dumper_q->tx_stat.bytes += tx_bytes;
> >   	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25 11:17   ` Zhou, YidingX
@ 2022-08-25 12:21     ` Ferruh Yigit
  2022-08-29 11:50       ` Zhou, YidingX
  0 siblings, 1 reply; 20+ messages in thread
From: Ferruh Yigit @ 2022-08-25 12:21 UTC (permalink / raw)
  To: Zhou, YidingX, dev; +Cc: Zhang, Qi Z, stable

On 8/25/2022 12:17 PM, Zhou, YidingX wrote:
> 
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>> Sent: Thursday, August 25, 2022 6:09 PM
>> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
>> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
>> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
>>
>> On 8/25/2022 8:20 AM, Yiding Zhou wrote:
>>> The pcap file will be synchronized to the disk when stopping the device.
>>> It takes a long time if the file is large that would cause the 'detach
>>> sync request' timeout when the device is closed under multi-process
>>> scenario.
>>>
>>> This commit fixes the issue by performing synchronization in Tx path
>>>
>>> Fixes: 4c173302c307 ("pcap: add new driver")
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
>>> ---
>>>    drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
>>>    1 file changed, 16 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/pcap/pcap_ethdev.c
>>> b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674 100644
>>> --- a/drivers/net/pcap/pcap_ethdev.c
>>> +++ b/drivers/net/pcap/pcap_ethdev.c
>>> @@ -3,7 +3,7 @@
>>>     * Copyright(c) 2014 6WIND S.A.
>>>     * All rights reserved.
>>>     */
>>> -
>>> +#include <unistd.h>
>>>    #include <time.h>
>>>
>>>    #include <pcap.h>
>>> @@ -38,6 +38,8 @@
>>>
>>>    #define RTE_PMD_PCAP_MAX_QUEUES 16
>>>
>>> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
>>> +

I guess this is 512MB, can you please comment this.
Is there any specific reason to select this value, or is it arbitrary?


>>>    static char errbuf[PCAP_ERRBUF_SIZE];
>>>    static struct timespec start_time;
>>>    static uint64_t start_cycles;
>>> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
>>>    static uint64_t timestamp_rx_dynflag;
>>>    static int timestamp_dynfield_offset = -1;
>>>
>>> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
>>> +
>>>    struct queue_stat {
>>>    	volatile unsigned long pkts;
>>>    	volatile unsigned long bytes;
>>> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
>>>
>>>    RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
>>>
>>> +static inline void
>>> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {

'pcap_' is the namespace for the libpcap, can you select another prefix, 
like 'eth_pcap_' as many driver functions does.

>>> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
>>> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
>> ETH_PCAP_SYNC_THRESHOLD)) {
>>> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
>>> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
>>> +	}
>>> +}
>>> +

pcap supports multiple queue, and each queue creates a new pcap dumper 
and single core/thread can be used for this multiple dumpers. In that 
case I think above per lcore variable logic doesn't work.

And instead of having a global value, what do you think to add a 
variable to 'struct pcap_tx_queue' for this purpose?

>>>    static struct queue_missed_stat*
>>>    queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
>>>    {
>>> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
>> **bufs, uint16_t nb_pkts)
>>>    	 * process stops and to make sure the pcap file is actually written,
>>>    	 * we flush the pcap dumper within each burst.
>>>    	 */
>>> -	pcap_dump_flush(dumper);
>>> +	pcap_dumper_data_sync(dumper, tx_bytes);
>>
>> 'pcap_dump_flush()' should be doing the same thing, to write buffer to file,
>> isn't it working?
>>
>> Can you check the return value of the 'pcap_dump_flush()' API, I wonder if it
>> keeps failing, for some reason?
>>
> 
> 'pcap_dump_flush()' returns 0 each time without error, it calls 'fflush()' to flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to ensure data is written to disk.
> 

'pcap_dump_flush()' API documentation says "flushes the output buffer to 
the ``savefile,''", but as you said it uses 'fflush()' internally, so 
there is a chance that data is not written to the disk.

In this case, won't need to keep both, first flush and later 
fsync/fdatasync?

Do you observe any performance difference after change, since now 
writing to actual disk on datapath?

>>>    	dumper_q->tx_stat.pkts += num_tx;
>>>    	dumper_q->tx_stat.bytes += tx_bytes;
>>>    	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25 12:21     ` Ferruh Yigit
@ 2022-08-29 11:50       ` Zhou, YidingX
  2022-08-31 16:42         ` Stephen Hemminger
  2022-09-01  7:40         ` Zhou, YidingX
  0 siblings, 2 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-08-29 11:50 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Zhang, Qi Z, stable

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Thursday, August 25, 2022 8:22 PM
> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
> 
> On 8/25/2022 12:17 PM, Zhou, YidingX wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >> Sent: Thursday, August 25, 2022 6:09 PM
> >> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
> >> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
> >> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
> >>
> >> On 8/25/2022 8:20 AM, Yiding Zhou wrote:
> >>> The pcap file will be synchronized to the disk when stopping the device.
> >>> It takes a long time if the file is large that would cause the
> >>> 'detach sync request' timeout when the device is closed under
> >>> multi-process scenario.
> >>>
> >>> This commit fixes the issue by performing synchronization in Tx path
> >>>
> >>> Fixes: 4c173302c307 ("pcap: add new driver")
> >>> Cc: stable@dpdk.org
> >>>
> >>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> >>> ---
> >>>    drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
> >>>    1 file changed, 16 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/net/pcap/pcap_ethdev.c
> >>> b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674 100644
> >>> --- a/drivers/net/pcap/pcap_ethdev.c
> >>> +++ b/drivers/net/pcap/pcap_ethdev.c
> >>> @@ -3,7 +3,7 @@
> >>>     * Copyright(c) 2014 6WIND S.A.
> >>>     * All rights reserved.
> >>>     */
> >>> -
> >>> +#include <unistd.h>
> >>>    #include <time.h>
> >>>
> >>>    #include <pcap.h>
> >>> @@ -38,6 +38,8 @@
> >>>
> >>>    #define RTE_PMD_PCAP_MAX_QUEUES 16
> >>>
> >>> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> >>> +
> 
> I guess this is 512MB, can you please comment this.
> Is there any specific reason to select this value, or is it arbitrary?
> 
> 

512M is arbitrary, because there is no API to get the disk cache size associated with a specific file.
I will test the performance impact of different values.

> >>>    static char errbuf[PCAP_ERRBUF_SIZE];
> >>>    static struct timespec start_time;
> >>>    static uint64_t start_cycles;
> >>> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
> >>>    static uint64_t timestamp_rx_dynflag;
> >>>    static int timestamp_dynfield_offset = -1;
> >>>
> >>> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> >>> +
> >>>    struct queue_stat {
> >>>    	volatile unsigned long pkts;
> >>>    	volatile unsigned long bytes;
> >>> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
> >>>
> >>>    RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
> >>>
> >>> +static inline void
> >>> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {
> 
> 'pcap_' is the namespace for the libpcap, can you select another prefix, like
> 'eth_pcap_' as many driver functions does.
> 
> >>> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> >>> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
> >> ETH_PCAP_SYNC_THRESHOLD)) {
> >>> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> >>> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> >>> +	}
> >>> +}
> >>> +
> 
> pcap supports multiple queue, and each queue creates a new pcap dumper and
> single core/thread can be used for this multiple dumpers. In that case I think
> above per lcore variable logic doesn't work.
> 
> And instead of having a global value, what do you think to add a variable to
> 'struct pcap_tx_queue' for this purpose?
> 

Thanks for the comments, I will follow this.

> >>>    static struct queue_missed_stat*
> >>>    queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
> >>>    {
> >>> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
> >> **bufs, uint16_t nb_pkts)
> >>>    	 * process stops and to make sure the pcap file is actually written,
> >>>    	 * we flush the pcap dumper within each burst.
> >>>    	 */
> >>> -	pcap_dump_flush(dumper);
> >>> +	pcap_dumper_data_sync(dumper, tx_bytes);
> >>
> >> 'pcap_dump_flush()' should be doing the same thing, to write buffer
> >> to file, isn't it working?
> >>
> >> Can you check the return value of the 'pcap_dump_flush()' API, I
> >> wonder if it keeps failing, for some reason?
> >>
> >
> > 'pcap_dump_flush()' returns 0 each time without error, it calls 'fflush()' to
> flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to ensure data
> is written to disk.
> >
> 
> 'pcap_dump_flush()' API documentation says "flushes the output buffer to the
> ``savefile,''", but as you said it uses 'fflush()' internally, so there is a chance that
> data is not written to the disk.
> 
> In this case, won't need to keep both, first flush and later fsync/fdatasync?
> 

I draw a diagram to describe it more clearly

 fwrite                             fclose/fflush                                                   fclose/fdatasync
--------->| libc buffer  |----------------> |    disk cache in RAM          |---------------------> |disk|
              | 4096 Bytes |                         | size is determined by OS |                                |       |

When the libc buffer is full, the system will automatically sync it to the disk cache.
It is easily full as it's only 4096 B size. so there is no need to call 'fflush()' every time.
The real time consuming action is syncing the disk cache to disk.
Because the disk cache is very large, it will take a long time to sync all at one time during 'fclose()',
so need to call 'fdatasync()' periodically to amortize the time.

> Do you observe any performance difference after change, since now writing to
> actual disk on datapath?
> 

I will verify any performance difference under these 2 scenarios.

> >>>    	dumper_q->tx_stat.pkts += num_tx;
> >>>    	dumper_q->tx_stat.bytes += tx_bytes;
> >>>    	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> >


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] net/pcap: reduce time for stopping device
  2022-08-29 11:50       ` Zhou, YidingX
@ 2022-08-31 16:42         ` Stephen Hemminger
  2022-09-01  7:40         ` Zhou, YidingX
  1 sibling, 0 replies; 20+ messages in thread
From: Stephen Hemminger @ 2022-08-31 16:42 UTC (permalink / raw)
  To: Zhou, YidingX; +Cc: Ferruh Yigit, dev, Zhang, Qi Z, stable

On Mon, 29 Aug 2022 11:50:29 +0000
"Zhou, YidingX" <yidingx.zhou@intel.com> wrote:

> I draw a diagram to describe it more clearly
> 
>  fwrite                             fclose/fflush                                                   fclose/fdatasync
> --------->| libc buffer  |----------------> |    disk cache in RAM          |---------------------> |disk|  
>               | 4096 Bytes |                         | size is determined by OS |                                |       |
> 
> When the libc buffer is full, the system will automatically sync it to the disk cache.
> It is easily full as it's only 4096 B size. so there is no need to call 'fflush()' every time.
> The real time consuming action is syncing the disk cache to disk.
> Because the disk cache is very large, it will take a long time to sync all at one time during 'fclose()',
> so need to call 'fdatasync()' periodically to amortize the time.

If you want to speed up this, then get rid of stdio and use a faster
API like io_uring. What you tried can help but using a better API
would make bigger gains.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] net/pcap: reduce time for stopping device
  2022-08-29 11:50       ` Zhou, YidingX
  2022-08-31 16:42         ` Stephen Hemminger
@ 2022-09-01  7:40         ` Zhou, YidingX
  1 sibling, 0 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-09-01  7:40 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Zhang, Qi Z, stable

Hi, Ferruh

> > >>> The pcap file will be synchronized to the disk when stopping the device.
> > >>> It takes a long time if the file is large that would cause the
> > >>> 'detach sync request' timeout when the device is closed under
> > >>> multi-process scenario.
> > >>>
> > >>> This commit fixes the issue by performing synchronization in Tx
> > >>> path
> > >>>
> > >>> Fixes: 4c173302c307 ("pcap: add new driver")
> > >>> Cc: stable@dpdk.org
> > >>>
> > >>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > >>> ---
> > >>>    drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
> > >>>    1 file changed, 16 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/drivers/net/pcap/pcap_ethdev.c
> > >>> b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674
> > >>> 100644
> > >>> --- a/drivers/net/pcap/pcap_ethdev.c
> > >>> +++ b/drivers/net/pcap/pcap_ethdev.c
> > >>> @@ -3,7 +3,7 @@
> > >>>     * Copyright(c) 2014 6WIND S.A.
> > >>>     * All rights reserved.
> > >>>     */
> > >>> -
> > >>> +#include <unistd.h>
> > >>>    #include <time.h>
> > >>>
> > >>>    #include <pcap.h>
> > >>> @@ -38,6 +38,8 @@
> > >>>
> > >>>    #define RTE_PMD_PCAP_MAX_QUEUES 16
> > >>>
> > >>> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> > >>> +
> >
> > I guess this is 512MB, can you please comment this.
> > Is there any specific reason to select this value, or is it arbitrary?
> >
> >
> 
> 512M is arbitrary, because there is no API to get the disk cache size associated
> with a specific file.
> I will test the performance impact of different values.
> 
> > >>>    static char errbuf[PCAP_ERRBUF_SIZE];
> > >>>    static struct timespec start_time;
> > >>>    static uint64_t start_cycles;
> > >>> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
> > >>>    static uint64_t timestamp_rx_dynflag;
> > >>>    static int timestamp_dynfield_offset = -1;
> > >>>
> > >>> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> > >>> +
> > >>>    struct queue_stat {
> > >>>    	volatile unsigned long pkts;
> > >>>    	volatile unsigned long bytes;
> > >>> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
> > >>>
> > >>>    RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
> > >>>
> > >>> +static inline void
> > >>> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {
> >
> > 'pcap_' is the namespace for the libpcap, can you select another
> > prefix, like 'eth_pcap_' as many driver functions does.
> >
> > >>> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> > >>> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
> > >> ETH_PCAP_SYNC_THRESHOLD)) {
> > >>> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> > >>> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> > >>> +	}
> > >>> +}
> > >>> +
> >
> > pcap supports multiple queue, and each queue creates a new pcap dumper
> > and single core/thread can be used for this multiple dumpers. In that
> > case I think above per lcore variable logic doesn't work.
> >
> > And instead of having a global value, what do you think to add a
> > variable to 'struct pcap_tx_queue' for this purpose?
> >
> 
> Thanks for the comments, I will follow this.
> 
> > >>>    static struct queue_missed_stat*
> > >>>    queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
> > >>>    {
> > >>> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct
> > >>> rte_mbuf
> > >> **bufs, uint16_t nb_pkts)
> > >>>    	 * process stops and to make sure the pcap file is actually written,
> > >>>    	 * we flush the pcap dumper within each burst.
> > >>>    	 */
> > >>> -	pcap_dump_flush(dumper);
> > >>> +	pcap_dumper_data_sync(dumper, tx_bytes);
> > >>
> > >> 'pcap_dump_flush()' should be doing the same thing, to write buffer
> > >> to file, isn't it working?
> > >>
> > >> Can you check the return value of the 'pcap_dump_flush()' API, I
> > >> wonder if it keeps failing, for some reason?
> > >>
> > >
> > > 'pcap_dump_flush()' returns 0 each time without error, it calls
> > > 'fflush()' to
> > flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to
> > ensure data is written to disk.
> > >
> >
> > 'pcap_dump_flush()' API documentation says "flushes the output buffer
> > to the ``savefile,''", but as you said it uses 'fflush()' internally,
> > so there is a chance that data is not written to the disk.
> >
> > In this case, won't need to keep both, first flush and later fsync/fdatasync?
> >
> 
> I draw a diagram to describe it more clearly
> 
>  fwrite                             fclose/fflush                                                   fclose/fdatasync
> --------->| libc buffer  |----------------> |    disk cache in RAM          |--------------------->
> |disk|
>               | 4096 Bytes |                         | size is determined by OS |
> |       |
> 
> When the libc buffer is full, the system will automatically sync it to the disk
> cache.
> It is easily full as it's only 4096 B size. so there is no need to call 'fflush()' every
> time.
> The real time consuming action is syncing the disk cache to disk.
> Because the disk cache is very large, it will take a long time to sync all at one
> time during 'fclose()', so need to call 'fdatasync()' periodically to amortize the
> time.
> 
> > Do you observe any performance difference after change, since now
> > writing to actual disk on datapath?
> >
> 
> I will verify any performance difference under these 2 scenarios.
> 
I have done some tests, using 'fdatasync()' will cause the packet loss rate to increase by 15%-20%, 
It's not a good solution, need to rework. I plan to use a timer to call pcap_dump_close to fix this, 
do you have any suggestion?
thanks
> > >>>    	dumper_q->tx_stat.pkts += num_tx;
> > >>>    	dumper_q->tx_stat.bytes += tx_bytes;
> > >>>    	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> > >


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] net/pcap: fix timeout of stopping device
  2022-08-25  7:20 [PATCH] net/pcap: reduce time for stopping device Yiding Zhou
  2022-08-25 10:09 ` Ferruh Yigit
@ 2022-09-06  8:05 ` Yiding Zhou
  2022-09-06 14:57   ` Stephen Hemminger
  1 sibling, 1 reply; 20+ messages in thread
From: Yiding Zhou @ 2022-09-06  8:05 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, anatoly.burakov, xingguang.he, Yiding Zhou, stable

The pcap file will be synchronized to the disk when stopping the device.
It takes a long time if the file is large that would cause the
'detach sync request' timeout when the device is closed under multi-process
scenario.

This commit fixes the issue by using alarm handler to release dumper.

Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
Cc: stable@dpdk.org

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>

---
v2: use alarm handler to release dumper
---
 drivers/net/pcap/pcap_ethdev.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..5c643a0277 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -17,6 +17,7 @@
 #include <rte_mbuf_dyn.h>
 #include <rte_bus_vdev.h>
 #include <rte_os_shim.h>
+#include <rte_alarm.h>
 
 #include "pcap_osdep.h"
 
@@ -664,6 +665,25 @@ eth_dev_start(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static void eth_pcap_dumper_release(void *arg)
+{
+	pcap_dump_close((pcap_dumper_t *)arg);
+}
+
+static void
+eth_pcap_dumper_close(pcap_dumper_t *dumper)
+{
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/*
+		 * Delay 30 seconds before releasing dumper to wait for file sync
+		 * to complete to avoid blocking alarm thread in PRIMARY process
+		 */
+		rte_eal_alarm_set(30000000, eth_pcap_dumper_release, dumper);
+	} else {
+		rte_eal_alarm_set(1, eth_pcap_dumper_release, dumper);
+	}
+}
+
 /*
  * This function gets called when the current port gets stopped.
  * Is the only place for us to close all the tx streams dumpers.
@@ -689,7 +709,7 @@ eth_dev_stop(struct rte_eth_dev *dev)
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
 		if (pp->tx_dumper[i] != NULL) {
-			pcap_dump_close(pp->tx_dumper[i]);
+			eth_pcap_dumper_close(pp->tx_dumper[i]);
 			pp->tx_dumper[i] = NULL;
 		}
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-09-06  8:05 ` [PATCH v2] net/pcap: fix timeout of " Yiding Zhou
@ 2022-09-06 14:57   ` Stephen Hemminger
  2022-09-06 16:21     ` Zhou, YidingX
  2022-09-21  7:14     ` Zhou, YidingX
  0 siblings, 2 replies; 20+ messages in thread
From: Stephen Hemminger @ 2022-09-06 14:57 UTC (permalink / raw)
  To: Yiding Zhou; +Cc: dev, qi.z.zhang, anatoly.burakov, xingguang.he, stable

On Tue,  6 Sep 2022 16:05:11 +0800
Yiding Zhou <yidingx.zhou@intel.com> wrote:

> The pcap file will be synchronized to the disk when stopping the device.
> It takes a long time if the file is large that would cause the
> 'detach sync request' timeout when the device is closed under multi-process
> scenario.
> 
> This commit fixes the issue by using alarm handler to release dumper.
> 
> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>


I think you need to redesign the handshake if this the case.
Forcing 30 second delay at the end of all uses of pcap is not acceptable.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-09-06 14:57   ` Stephen Hemminger
@ 2022-09-06 16:21     ` Zhou, YidingX
  2022-09-21  7:14     ` Zhou, YidingX
  1 sibling, 0 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-09-06 16:21 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Zhang, Qi Z, Burakov, Anatoly, He, Xingguang, stable

On Tue,  6 Sep 2022 16:05:11 +0800
Yiding Zhou <yidingx.zhou@intel.com> wrote:

> The pcap file will be synchronized to the disk when stopping the device.
> It takes a long time if the file is large that would cause the 'detach 
> sync request' timeout when the device is closed under multi-process 
> scenario.
> 
> This commit fixes the issue by using alarm handler to release dumper.
> 
> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>


I think you need to redesign the handshake if this the case.
Forcing 30 second delay at the end of all uses of pcap is not acceptable.

Thanks for your comments.
According to my test, the time required to sync a 100G pcap file is about 20s, 
so I set a delay of 30s. I also tried to use AIO, but some issue in the multi-process scenario.
And I also consider io_uring,  it is only supported in linux-5.x, we need to consider compatibility,. 
Maybe better way is to do more work to redesign the handshake.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-09-06 14:57   ` Stephen Hemminger
  2022-09-06 16:21     ` Zhou, YidingX
@ 2022-09-21  7:14     ` Zhou, YidingX
  2022-10-03 15:00       ` Ferruh Yigit
  2022-11-22  9:25       ` Zhou, YidingX
  1 sibling, 2 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-09-21  7:14 UTC (permalink / raw)
  To: Stephen Hemminger, Zhang, Qi Z
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, September 6, 2022 10:58 PM
> To: Zhou, YidingX <yidingx.zhou@intel.com>
> Cc: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Burakov, Anatoly
> <anatoly.burakov@intel.com>; He, Xingguang <xingguang.he@intel.com>;
> stable@dpdk.org
> Subject: Re: [PATCH v2] net/pcap: fix timeout of stopping device
> 
> On Tue,  6 Sep 2022 16:05:11 +0800
> Yiding Zhou <yidingx.zhou@intel.com> wrote:
> 
> > The pcap file will be synchronized to the disk when stopping the device.
> > It takes a long time if the file is large that would cause the 'detach
> > sync request' timeout when the device is closed under multi-process
> > scenario.
> >
> > This commit fixes the issue by using alarm handler to release dumper.
> >
> > Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> 
> 
> I think you need to redesign the handshake if this the case.
> Forcing 30 second delay at the end of all uses of pcap is not acceptable.

@Zhang, Qi Z Do we need to redesign the handshake to fix this?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-09-21  7:14     ` Zhou, YidingX
@ 2022-10-03 15:00       ` Ferruh Yigit
  2022-11-22  9:25       ` Zhou, YidingX
  1 sibling, 0 replies; 20+ messages in thread
From: Ferruh Yigit @ 2022-10-03 15:00 UTC (permalink / raw)
  To: Zhou, YidingX, Stephen Hemminger, Zhang, Qi Z
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable

On 9/21/2022 8:14 AM, Zhou, YidingX wrote:
> 
> 
>> -----Original Message-----
>> From: Stephen Hemminger <stephen@networkplumber.org>
>> Sent: Tuesday, September 6, 2022 10:58 PM
>> To: Zhou, YidingX <yidingx.zhou@intel.com>
>> Cc: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Burakov, Anatoly
>> <anatoly.burakov@intel.com>; He, Xingguang <xingguang.he@intel.com>;
>> stable@dpdk.org
>> Subject: Re: [PATCH v2] net/pcap: fix timeout of stopping device
>>
>> On Tue,  6 Sep 2022 16:05:11 +0800
>> Yiding Zhou <yidingx.zhou@intel.com> wrote:
>>
>>> The pcap file will be synchronized to the disk when stopping the device.
>>> It takes a long time if the file is large that would cause the 'detach
>>> sync request' timeout when the device is closed under multi-process
>>> scenario.
>>>
>>> This commit fixes the issue by using alarm handler to release dumper.
>>>
>>> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
>>
>>
>> I think you need to redesign the handshake if this the case.
>> Forcing 30 second delay at the end of all uses of pcap is not acceptable.
> 
> @Zhang, Qi Z Do we need to redesign the handshake to fix this?

Hi Yiding,

Your approach works, but Stephen's comment is to not add this delay by 
default, because this delay is not always required.

Can you please provide more details on multi-process communication and 
call trace, to help us think about a solution to address this issue in a 
more generic way (not just for pcap but for any case device close takes 
more than multi-process timeout)?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-09-21  7:14     ` Zhou, YidingX
  2022-10-03 15:00       ` Ferruh Yigit
@ 2022-11-22  9:25       ` Zhou, YidingX
  2022-11-22 17:28         ` Stephen Hemminger
  2022-11-29 14:11         ` Ferruh Yigit
  1 sibling, 2 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-11-22  9:25 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable, Stephen Hemminger



> -----Original Message-----
> From: Zhou, YidingX <yidingx.zhou@intel.com>
> Sent: Wednesday, September 21, 2022 3:15 PM
> To: Stephen Hemminger <stephen@networkplumber.org>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; He,
> Xingguang <xingguang.he@intel.com>; stable@dpdk.org
> Subject: RE: [PATCH v2] net/pcap: fix timeout of stopping device
> 
> 
> 
> > -----Original Message-----
> > From: Stephen Hemminger <mailto:stephen@networkplumber.org>
> > Sent: Tuesday, September 6, 2022 10:58 PM
> > To: Zhou, YidingX <mailto:yidingx.zhou@intel.com>
> > Cc: mailto:dev@dpdk.org; Zhang, Qi Z <mailto:qi.z.zhang@intel.com>; Burakov, Anatoly
> > <mailto:anatoly.burakov@intel.com>; He, Xingguang <mailto:xingguang.he@intel.com>;
> > mailto:stable@dpdk.org
> > Subject: Re: [PATCH v2] net/pcap: fix timeout of stopping device
> >
> > On Tue,  6 Sep 2022 16:05:11 +0800
> > Yiding Zhou <mailto:yidingx.zhou@intel.com> wrote:
> >
> > > The pcap file will be synchronized to the disk when stopping the device.
> > > It takes a long time if the file is large that would cause the
> > > 'detach sync request' timeout when the device is closed under
> > > multi-process scenario.
> > >
> > > This commit fixes the issue by using alarm handler to release dumper.
> > >
> > > Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> > > Cc: mailto:stable@dpdk.org
> > >
> > > Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>
> >
> >
> > I think you need to redesign the handshake if this the case.
> > Forcing 30 second delay at the end of all uses of pcap is not acceptable.
> 
> @Zhang, Qi Z Do we need to redesign the handshake to fix this?

Hi, Ferruh
Sorry for the late reply.
I did not receive your email on Oct 6, I got your comments from patchwork.

"Can you please provide more details on multi-process communication and 
call trace, to help us think about a solution to address this issue in a 
more generic way (not just for pcap but for any case device close takes 
more than multi-process timeout)?"

I try to explain this issue with a sequence diagram, hope it can be displayed correctly in the mail.

       thread                                 intr thread           intr thread             thread
    of secondary                       of secondary          of primary          of primary
             |                                              |                         |                          |
             |                                              |                         |                          |
rte_eal_hotplug_remove
rte_dev_remove
eal_dev_hotplug_request_to_primary
rte_mp_request_sync ------------------------------------------------------->|
                                                                                                                    |
                                                                                              handle_secondary_request
                                                                                         |<-----------------|
                                                                                         |
                                                                   __handle_secondary_request
                                                          eal_dev_hotplug_request_to_secondary
           |<------------------------------------- rte_mp_request_sync
           |
handle_primary_request--------->|
                                                           |
                            __handle_primary_request
                               local_dev_remove(this will take long time)
                                            rte_mp_reply -------------------------------->|                              
                                                                                         |
                                                                             local_dev_remove
          |<------------------------------------------------- rte_mp_reply

The marked 'local_dev_remove()' in the secondary process will perform a pcap file synchronization operation.
When the pcap file is too large, it will take a lot of time (according to my test 100G takes 20+ seconds).
This caused the processing of hot_plug message to time out.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-11-22  9:25       ` Zhou, YidingX
@ 2022-11-22 17:28         ` Stephen Hemminger
  2022-12-02 10:22           ` Zhou, YidingX
  2022-11-29 14:11         ` Ferruh Yigit
  1 sibling, 1 reply; 20+ messages in thread
From: Stephen Hemminger @ 2022-11-22 17:28 UTC (permalink / raw)
  To: Zhou, YidingX; +Cc: ferruh.yigit, dev, Burakov, Anatoly, He, Xingguang, stable

On Tue, 22 Nov 2022 09:25:33 +0000
"Zhou, YidingX" <yidingx.zhou@intel.com> wrote:

> > -----Original Message-----
> > From: Zhou, YidingX <yidingx.zhou@intel.com>
> > Sent: Wednesday, September 21, 2022 3:15 PM
> > To: Stephen Hemminger <stephen@networkplumber.org>; Zhang, Qi Z
> > <qi.z.zhang@intel.com>
> > Cc: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; He,
> > Xingguang <xingguang.he@intel.com>; stable@dpdk.org
> > Subject: RE: [PATCH v2] net/pcap: fix timeout of stopping device
> > 
> > 
> >   
> > > -----Original Message-----
> > > From: Stephen Hemminger <mailto:stephen@networkplumber.org>
> > > Sent: Tuesday, September 6, 2022 10:58 PM
> > > To: Zhou, YidingX <mailto:yidingx.zhou@intel.com>
> > > Cc: mailto:dev@dpdk.org; Zhang, Qi Z <mailto:qi.z.zhang@intel.com>; Burakov, Anatoly
> > > <mailto:anatoly.burakov@intel.com>; He, Xingguang <mailto:xingguang.he@intel.com>;
> > > mailto:stable@dpdk.org
> > > Subject: Re: [PATCH v2] net/pcap: fix timeout of stopping device
> > >
> > > On Tue,  6 Sep 2022 16:05:11 +0800
> > > Yiding Zhou <mailto:yidingx.zhou@intel.com> wrote:
> > >  
> > > > The pcap file will be synchronized to the disk when stopping the device.
> > > > It takes a long time if the file is large that would cause the
> > > > 'detach sync request' timeout when the device is closed under
> > > > multi-process scenario.
> > > >
> > > > This commit fixes the issue by using alarm handler to release dumper.
> > > >
> > > > Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> > > > Cc: mailto:stable@dpdk.org
> > > >
> > > > Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>  
> > >
> > >
> > > I think you need to redesign the handshake if this the case.
> > > Forcing 30 second delay at the end of all uses of pcap is not acceptable.  
> > 
> > @Zhang, Qi Z Do we need to redesign the handshake to fix this?  
> 
> Hi, Ferruh
> Sorry for the late reply.
> I did not receive your email on Oct 6, I got your comments from patchwork.
> 
> "Can you please provide more details on multi-process communication and 
> call trace, to help us think about a solution to address this issue in a 
> more generic way (not just for pcap but for any case device close takes 
> more than multi-process timeout)?"
> 
> I try to explain this issue with a sequence diagram, hope it can be displayed correctly in the mail.
> 
>        thread                                 intr thread           intr thread             thread
>     of secondary                       of secondary          of primary          of primary
>              |                                              |                         |                          |
>              |                                              |                         |                          |
> rte_eal_hotplug_remove
> rte_dev_remove
> eal_dev_hotplug_request_to_primary
> rte_mp_request_sync ------------------------------------------------------->|
>                                                                                                                     |
>                                                                                               handle_secondary_request
>                                                                                          |<-----------------|
>                                                                                          |
>                                                                    __handle_secondary_request
>                                                           eal_dev_hotplug_request_to_secondary
>            |<------------------------------------- rte_mp_request_sync
>            |
> handle_primary_request--------->|
>                                                            |
>                             __handle_primary_request
>                                local_dev_remove(this will take long time)
>                                             rte_mp_reply -------------------------------->|                              
>                                                                                          |
>                                                                              local_dev_remove
>           |<------------------------------------------------- rte_mp_reply
> 
> The marked 'local_dev_remove()' in the secondary process will perform a pcap file synchronization operation.
> When the pcap file is too large, it will take a lot of time (according to my test 100G takes 20+ seconds).
> This caused the processing of hot_plug message to time out.


Part of the problem maybe a hidden file sync in some library.
Normally, closing a file should be fast even with lots of outstanding data.
The actual write done by OS will continue from file cache.

I wonder if doing some kind of fadvise call might help see POSIX_FADV_SEQUENTIAL or POSIX_FADV_DONTNEED

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-11-22  9:25       ` Zhou, YidingX
  2022-11-22 17:28         ` Stephen Hemminger
@ 2022-11-29 14:11         ` Ferruh Yigit
  2022-12-02 10:13           ` Zhou, YidingX
  1 sibling, 1 reply; 20+ messages in thread
From: Ferruh Yigit @ 2022-11-29 14:11 UTC (permalink / raw)
  To: Zhou, YidingX
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable, Stephen Hemminger

On 11/22/2022 9:25 AM, Zhou, YidingX wrote:
> 
> 
>> -----Original Message-----
>> From: Zhou, YidingX <yidingx.zhou@intel.com>
>> Sent: Wednesday, September 21, 2022 3:15 PM
>> To: Stephen Hemminger <stephen@networkplumber.org>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>
>> Cc: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; He,
>> Xingguang <xingguang.he@intel.com>; stable@dpdk.org
>> Subject: RE: [PATCH v2] net/pcap: fix timeout of stopping device
>>
>>
>>
>>> -----Original Message-----
>>> From: Stephen Hemminger <mailto:stephen@networkplumber.org>
>>> Sent: Tuesday, September 6, 2022 10:58 PM
>>> To: Zhou, YidingX <mailto:yidingx.zhou@intel.com>
>>> Cc: mailto:dev@dpdk.org; Zhang, Qi Z <mailto:qi.z.zhang@intel.com>; Burakov, Anatoly
>>> <mailto:anatoly.burakov@intel.com>; He, Xingguang <mailto:xingguang.he@intel.com>;
>>> mailto:stable@dpdk.org
>>> Subject: Re: [PATCH v2] net/pcap: fix timeout of stopping device
>>>
>>> On Tue,  6 Sep 2022 16:05:11 +0800
>>> Yiding Zhou <mailto:yidingx.zhou@intel.com> wrote:
>>>
>>>> The pcap file will be synchronized to the disk when stopping the device.
>>>> It takes a long time if the file is large that would cause the
>>>> 'detach sync request' timeout when the device is closed under
>>>> multi-process scenario.
>>>>
>>>> This commit fixes the issue by using alarm handler to release dumper.
>>>>
>>>> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
>>>> Cc: mailto:stable@dpdk.org
>>>>
>>>> Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>
>>>
>>>
>>> I think you need to redesign the handshake if this the case.
>>> Forcing 30 second delay at the end of all uses of pcap is not acceptable.
>>
>> @Zhang, Qi Z Do we need to redesign the handshake to fix this?
> 
> Hi, Ferruh
> Sorry for the late reply.
> I did not receive your email on Oct 6, I got your comments from patchwork.
> 
> "Can you please provide more details on multi-process communication and 
> call trace, to help us think about a solution to address this issue in a 
> more generic way (not just for pcap but for any case device close takes 
> more than multi-process timeout)?"
> 
> I try to explain this issue with a sequence diagram, hope it can be displayed correctly in the mail.
> 
>        thread                                 intr thread           intr thread             thread
>     of secondary                       of secondary          of primary          of primary
>              |                                              |                         |                          |
>              |                                              |                         |                          |
> rte_eal_hotplug_remove
> rte_dev_remove
> eal_dev_hotplug_request_to_primary
> rte_mp_request_sync ------------------------------------------------------->|
>                                                                                                                     |
>                                                                                               handle_secondary_request
>                                                                                          |<-----------------|
>                                                                                          |
>                                                                    __handle_secondary_request
>                                                           eal_dev_hotplug_request_to_secondary
>            |<------------------------------------- rte_mp_request_sync
>            |
> handle_primary_request--------->|
>                                                            |
>                             __handle_primary_request
>                                local_dev_remove(this will take long time)
>                                             rte_mp_reply -------------------------------->|                              
>                                                                                          |
>                                                                              local_dev_remove
>           |<------------------------------------------------- rte_mp_reply
> 
> The marked 'local_dev_remove()' in the secondary process will perform a pcap file synchronization operation.
> When the pcap file is too large, it will take a lot of time (according to my test 100G takes 20+ seconds).
> This caused the processing of hot_plug message to time out.

Hi Yiding,

Thanks for the information,

Right now all MP operations timeout is hardcoded in the code and it is 5
seconds.
Do you think does it work to have an API to set custom timeout,
something like `rte_mp_timeout_set()`, and call this from pdump?

This gives a generic solution for similar cases, not just for pcap.
But my concern is if this is too much multi-process related internal
detail to update, @Anatoly may comment on this.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-11-29 14:11         ` Ferruh Yigit
@ 2022-12-02 10:13           ` Zhou, YidingX
  2022-12-02 11:19             ` Ferruh Yigit
  0 siblings, 1 reply; 20+ messages in thread
From: Zhou, YidingX @ 2022-12-02 10:13 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable, Stephen Hemminger



> >>> On Tue,  6 Sep 2022 16:05:11 +0800
> >>> Yiding Zhou <mailto:yidingx.zhou@intel.com> wrote:
> >>>
> >>>> The pcap file will be synchronized to the disk when stopping the device.
> >>>> It takes a long time if the file is large that would cause the
> >>>> 'detach sync request' timeout when the device is closed under
> >>>> multi-process scenario.
> >>>>
> >>>> This commit fixes the issue by using alarm handler to release dumper.
> >>>>
> >>>> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> >>>> Cc: mailto:stable@dpdk.org
> >>>>
> >>>> Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>
> >>>
> >>>
> >>> I think you need to redesign the handshake if this the case.
> >>> Forcing 30 second delay at the end of all uses of pcap is not acceptable.
> >>
> >> @Zhang, Qi Z Do we need to redesign the handshake to fix this?
> >
> > Hi, Ferruh
> > Sorry for the late reply.
> > I did not receive your email on Oct 6, I got your comments from patchwork.
> >
> > "Can you please provide more details on multi-process communication
> > and call trace, to help us think about a solution to address this
> > issue in a more generic way (not just for pcap but for any case device
> > close takes more than multi-process timeout)?"
> >
> > I try to explain this issue with a sequence diagram, hope it can be displayed
> correctly in the mail.
> >
> >        thread                                 intr thread           intr thread             thread
> >     of secondary                       of secondary          of primary          of primary
> >              |                                              |                         |                          |
> >              |                                              |                         |                          |
> > rte_eal_hotplug_remove
> > rte_dev_remove
> > eal_dev_hotplug_request_to_primary
> > rte_mp_request_sync ------------------------------------------------------->|
> >                                                                                                                     |
> >
> handle_secondary_request
> >                                                                                          |<-----------------|
> >                                                                                          |
> >                                                                    __handle_secondary_request
> >                                                           eal_dev_hotplug_request_to_secondary
> >            |<------------------------------------- rte_mp_request_sync
> >            |
> > handle_primary_request--------->|
> >                                                            |
> >                             __handle_primary_request
> >                                local_dev_remove(this will take long time)
> >                                             rte_mp_reply -------------------------------->|
> >                                                                                          |
> >                                                                              local_dev_remove
> >           |<-------------------------------------------------
> > rte_mp_reply
> >
> > The marked 'local_dev_remove()' in the secondary process will perform a
> pcap file synchronization operation.
> > When the pcap file is too large, it will take a lot of time (according to my test
> 100G takes 20+ seconds).
> > This caused the processing of hot_plug message to time out.
> 
> Hi Yiding,
> 
> Thanks for the information,
> 
> Right now all MP operations timeout is hardcoded in the code and it is 5
> seconds.
> Do you think does it work to have an API to set custom timeout, something like
> `rte_mp_timeout_set()`, and call this from pdump?
> 
> This gives a generic solution for similar cases, not just for pcap.
> But my concern is if this is too much multi-process related internal detail to
> update, @Anatoly may comment on this.

Hi, Ferruh
For pdump case only, I think the timeout is affected by pcap's size and other system components, such as the type of FS, system memory size.
It may be difficult to predict the specific time value for setting.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-11-22 17:28         ` Stephen Hemminger
@ 2022-12-02 10:22           ` Zhou, YidingX
  0 siblings, 0 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-12-02 10:22 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: ferruh.yigit, dev, Burakov, Anatoly, He, Xingguang, stable

> > > > On Tue,  6 Sep 2022 16:05:11 +0800 Yiding Zhou
> > > > <mailto:yidingx.zhou@intel.com> wrote:
> > > >
> > > > > The pcap file will be synchronized to the disk when stopping the device.
> > > > > It takes a long time if the file is large that would cause the
> > > > > 'detach sync request' timeout when the device is closed under
> > > > > multi-process scenario.
> > > > >
> > > > > This commit fixes the issue by using alarm handler to release dumper.
> > > > >
> > > > > Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process
> > > > > private")
> > > > > Cc: mailto:stable@dpdk.org
> > > > >
> > > > > Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>
> > > >
> > > >
> > > > I think you need to redesign the handshake if this the case.
> > > > Forcing 30 second delay at the end of all uses of pcap is not acceptable.
> > >
> > > @Zhang, Qi Z Do we need to redesign the handshake to fix this?
> >
> > Hi, Ferruh
> > Sorry for the late reply.
> > I did not receive your email on Oct 6, I got your comments from patchwork.
> >
> > "Can you please provide more details on multi-process communication
> > and call trace, to help us think about a solution to address this
> > issue in a more generic way (not just for pcap but for any case device
> > close takes more than multi-process timeout)?"
> >
> > I try to explain this issue with a sequence diagram, hope it can be displayed
> correctly in the mail.
> >
> >        thread                                 intr thread           intr thread             thread
> >     of secondary                       of secondary          of primary          of primary
> >              |                                              |                         |                          |
> >              |                                              |                         |                          |
> > rte_eal_hotplug_remove
> > rte_dev_remove
> > eal_dev_hotplug_request_to_primary
> > rte_mp_request_sync ------------------------------------------------------->|
> >                                                                                                                     |
> >
> handle_secondary_request
> >                                                                                          |<-----------------|
> >                                                                                          |
> >                                                                    __handle_secondary_request
> >                                                           eal_dev_hotplug_request_to_secondary
> >            |<------------------------------------- rte_mp_request_sync
> >            |
> > handle_primary_request--------->|
> >                                                            |
> >                             __handle_primary_request
> >                                local_dev_remove(this will take long time)
> >                                             rte_mp_reply -------------------------------->|
> >                                                                                          |
> >                                                                              local_dev_remove
> >           |<-------------------------------------------------
> > rte_mp_reply
> >
> > The marked 'local_dev_remove()' in the secondary process will perform a
> pcap file synchronization operation.
> > When the pcap file is too large, it will take a lot of time (according to my test
> 100G takes 20+ seconds).
> > This caused the processing of hot_plug message to time out.
> 
> 
> Part of the problem maybe a hidden file sync in some library.
> Normally, closing a file should be fast even with lots of outstanding data.
> The actual write done by OS will continue from file cache.
> 
> I wonder if doing some kind of fadvise call might help see
> POSIX_FADV_SEQUENTIAL or POSIX_FADV_DONTNEED

Thanks for your advice, I will try it and give you feedback.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-12-02 10:13           ` Zhou, YidingX
@ 2022-12-02 11:19             ` Ferruh Yigit
  2022-12-05  1:58               ` Zhou, YidingX
  0 siblings, 1 reply; 20+ messages in thread
From: Ferruh Yigit @ 2022-12-02 11:19 UTC (permalink / raw)
  To: Zhou, YidingX
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable, Stephen Hemminger

On 12/2/2022 10:13 AM, Zhou, YidingX wrote:
> 
> 
>>>>> On Tue,  6 Sep 2022 16:05:11 +0800
>>>>> Yiding Zhou <mailto:yidingx.zhou@intel.com> wrote:
>>>>>
>>>>>> The pcap file will be synchronized to the disk when stopping the device.
>>>>>> It takes a long time if the file is large that would cause the
>>>>>> 'detach sync request' timeout when the device is closed under
>>>>>> multi-process scenario.
>>>>>>
>>>>>> This commit fixes the issue by using alarm handler to release dumper.
>>>>>>
>>>>>> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
>>>>>> Cc: mailto:stable@dpdk.org
>>>>>>
>>>>>> Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>
>>>>>
>>>>>
>>>>> I think you need to redesign the handshake if this the case.
>>>>> Forcing 30 second delay at the end of all uses of pcap is not acceptable.
>>>>
>>>> @Zhang, Qi Z Do we need to redesign the handshake to fix this?
>>>
>>> Hi, Ferruh
>>> Sorry for the late reply.
>>> I did not receive your email on Oct 6, I got your comments from patchwork.
>>>
>>> "Can you please provide more details on multi-process communication
>>> and call trace, to help us think about a solution to address this
>>> issue in a more generic way (not just for pcap but for any case device
>>> close takes more than multi-process timeout)?"
>>>
>>> I try to explain this issue with a sequence diagram, hope it can be displayed
>> correctly in the mail.
>>>
>>>        thread                                 intr thread           intr thread             thread
>>>     of secondary                       of secondary          of primary          of primary
>>>              |                                              |                         |                          |
>>>              |                                              |                         |                          |
>>> rte_eal_hotplug_remove
>>> rte_dev_remove
>>> eal_dev_hotplug_request_to_primary
>>> rte_mp_request_sync ------------------------------------------------------->|
>>>                                                                                                                     |
>>>
>> handle_secondary_request
>>>                                                                                          |<-----------------|
>>>                                                                                          |
>>>                                                                    __handle_secondary_request
>>>                                                           eal_dev_hotplug_request_to_secondary
>>>            |<------------------------------------- rte_mp_request_sync
>>>            |
>>> handle_primary_request--------->|
>>>                                                            |
>>>                             __handle_primary_request
>>>                                local_dev_remove(this will take long time)
>>>                                             rte_mp_reply -------------------------------->|
>>>                                                                                          |
>>>                                                                              local_dev_remove
>>>           |<-------------------------------------------------
>>> rte_mp_reply
>>>
>>> The marked 'local_dev_remove()' in the secondary process will perform a
>> pcap file synchronization operation.
>>> When the pcap file is too large, it will take a lot of time (according to my test
>> 100G takes 20+ seconds).
>>> This caused the processing of hot_plug message to time out.
>>
>> Hi Yiding,
>>
>> Thanks for the information,
>>
>> Right now all MP operations timeout is hardcoded in the code and it is 5
>> seconds.
>> Do you think does it work to have an API to set custom timeout, something like
>> `rte_mp_timeout_set()`, and call this from pdump?
>>
>> This gives a generic solution for similar cases, not just for pcap.
>> But my concern is if this is too much multi-process related internal detail to
>> update, @Anatoly may comment on this.
> 
> Hi, Ferruh
> For pdump case only, I think the timeout is affected by pcap's size and other system components, such as the type of FS, system memory size.
> It may be difficult to predict the specific time value for setting.

It doesn't have to be specific.

Point here is to have a multi process API to set timeout, instead of put
a hardcoded timeout in pcap PMD.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] net/pcap: fix timeout of stopping device
  2022-12-02 11:19             ` Ferruh Yigit
@ 2022-12-05  1:58               ` Zhou, YidingX
  0 siblings, 0 replies; 20+ messages in thread
From: Zhou, YidingX @ 2022-12-05  1:58 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, Burakov, Anatoly, He, Xingguang, stable, Stephen Hemminger

> >>>>> On Tue,  6 Sep 2022 16:05:11 +0800 Yiding Zhou
> >>>>> <mailto:yidingx.zhou@intel.com> wrote:
> >>>>>
> >>>>>> The pcap file will be synchronized to the disk when stopping the device.
> >>>>>> It takes a long time if the file is large that would cause the
> >>>>>> 'detach sync request' timeout when the device is closed under
> >>>>>> multi-process scenario.
> >>>>>>
> >>>>>> This commit fixes the issue by using alarm handler to release dumper.
> >>>>>>
> >>>>>> Fixes: 0ecfb6c04d54 ("net/pcap: move handler to process private")
> >>>>>> Cc: mailto:stable@dpdk.org
> >>>>>>
> >>>>>> Signed-off-by: Yiding Zhou <mailto:yidingx.zhou@intel.com>
> >>>>>
> >>>>>
> >>>>> I think you need to redesign the handshake if this the case.
> >>>>> Forcing 30 second delay at the end of all uses of pcap is not acceptable.
> >>>>
> >>>> @Zhang, Qi Z Do we need to redesign the handshake to fix this?
> >>>
> >>> Hi, Ferruh
> >>> Sorry for the late reply.
> >>> I did not receive your email on Oct 6, I got your comments from patchwork.
> >>>
> >>> "Can you please provide more details on multi-process communication
> >>> and call trace, to help us think about a solution to address this
> >>> issue in a more generic way (not just for pcap but for any case
> >>> device close takes more than multi-process timeout)?"
> >>>
> >>> I try to explain this issue with a sequence diagram, hope it can be
> >>> displayed
> >> correctly in the mail.
> >>>
> >>>        thread                                 intr thread           intr thread             thread
> >>>     of secondary                       of secondary          of primary          of
> primary
> >>>              |                                              |                         |                          |
> >>>              |                                              |                         |                          |
> >>> rte_eal_hotplug_remove
> >>> rte_dev_remove
> >>> eal_dev_hotplug_request_to_primary
> >>> rte_mp_request_sync ------------------------------------------------------->|
> >>>
> >>> |
> >>>
> >> handle_secondary_request
> >>>                                                                                          |<-----------------|
> >>>                                                                                          |
> >>>                                                                    __handle_secondary_request
> >>>
> eal_dev_hotplug_request_to_secondary
> >>>            |<------------------------------------- rte_mp_request_sync
> >>>            |
> >>> handle_primary_request--------->|
> >>>                                                            |
> >>>                             __handle_primary_request
> >>>                                local_dev_remove(this will take long time)
> >>>                                             rte_mp_reply -------------------------------->|
> >>>                                                                                          |
> >>>                                                                              local_dev_remove
> >>>           |<-------------------------------------------------
> >>> rte_mp_reply
> >>>
> >>> The marked 'local_dev_remove()' in the secondary process will
> >>> perform a
> >> pcap file synchronization operation.
> >>> When the pcap file is too large, it will take a lot of time
> >>> (according to my test
> >> 100G takes 20+ seconds).
> >>> This caused the processing of hot_plug message to time out.
> >>
> >> Hi Yiding,
> >>
> >> Thanks for the information,
> >>
> >> Right now all MP operations timeout is hardcoded in the code and it
> >> is 5 seconds.
> >> Do you think does it work to have an API to set custom timeout,
> >> something like `rte_mp_timeout_set()`, and call this from pdump?
> >>
> >> This gives a generic solution for similar cases, not just for pcap.
> >> But my concern is if this is too much multi-process related internal
> >> detail to update, @Anatoly may comment on this.
> >
> > Hi, Ferruh
> > For pdump case only, I think the timeout is affected by pcap's size and other
> system components, such as the type of FS, system memory size.
> > It may be difficult to predict the specific time value for setting.
> 
> It doesn't have to be specific.
> 
> Point here is to have a multi process API to set timeout, instead of put a
> hardcoded timeout in pcap PMD.

OK, I understood.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] net/pcap: reduce time for stopping device
@ 2022-08-25  6:27 Yiding Zhou
  0 siblings, 0 replies; 20+ messages in thread
From: Yiding Zhou @ 2022-08-25  6:27 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, stable.dpdk.org, Yiding Zhou, stable

The pcap file will be synchronized to the disk when stopping the device.
It takes a long time if the file is large that would cause the
'detach sync request' timeout when the device is closed under multi-process
scenario.

This commit fixes the issue by performing synchronization in Tx path

Fixes: 4c173302c307 ("pcap: add new driver")
Cc: stable@dpdk.org

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
---
 drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..52eafa5674 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -3,7 +3,7 @@
  * Copyright(c) 2014 6WIND S.A.
  * All rights reserved.
  */
-
+#include <unistd.h>
 #include <time.h>
 
 #include <pcap.h>
@@ -38,6 +38,8 @@
 
 #define RTE_PMD_PCAP_MAX_QUEUES 16
 
+#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
+
 static char errbuf[PCAP_ERRBUF_SIZE];
 static struct timespec start_time;
 static uint64_t start_cycles;
@@ -47,6 +49,8 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
+RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
+
 struct queue_stat {
 	volatile unsigned long pkts;
 	volatile unsigned long bytes;
@@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
 
 RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
 
+static inline void
+pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes)
+{
+	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
+	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) > ETH_PCAP_SYNC_THRESHOLD)) {
+		if (!fdatasync(fileno(pcap_dump_file(dumper))))
+			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
+	}
+}
+
 static struct queue_missed_stat*
 queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
 {
@@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * process stops and to make sure the pcap file is actually written,
 	 * we flush the pcap dumper within each burst.
 	 */
-	pcap_dump_flush(dumper);
+	pcap_dumper_data_sync(dumper, tx_bytes);
 	dumper_q->tx_stat.pkts += num_tx;
 	dumper_q->tx_stat.bytes += tx_bytes;
 	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-12-05  1:58 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-25  7:20 [PATCH] net/pcap: reduce time for stopping device Yiding Zhou
2022-08-25 10:09 ` Ferruh Yigit
2022-08-25 11:17   ` Zhou, YidingX
2022-08-25 12:21     ` Ferruh Yigit
2022-08-29 11:50       ` Zhou, YidingX
2022-08-31 16:42         ` Stephen Hemminger
2022-09-01  7:40         ` Zhou, YidingX
2022-09-06  8:05 ` [PATCH v2] net/pcap: fix timeout of " Yiding Zhou
2022-09-06 14:57   ` Stephen Hemminger
2022-09-06 16:21     ` Zhou, YidingX
2022-09-21  7:14     ` Zhou, YidingX
2022-10-03 15:00       ` Ferruh Yigit
2022-11-22  9:25       ` Zhou, YidingX
2022-11-22 17:28         ` Stephen Hemminger
2022-12-02 10:22           ` Zhou, YidingX
2022-11-29 14:11         ` Ferruh Yigit
2022-12-02 10:13           ` Zhou, YidingX
2022-12-02 11:19             ` Ferruh Yigit
2022-12-05  1:58               ` Zhou, YidingX
  -- strict thread matches above, loose matches on Subject: below --
2022-08-25  6:27 [PATCH] net/pcap: reduce time for " Yiding Zhou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).