patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH] net/pcap: reduce time for stopping device
@ 2022-08-25  6:27 Yiding Zhou
  0 siblings, 0 replies; 8+ messages in thread
From: Yiding Zhou @ 2022-08-25  6:27 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, stable.dpdk.org, Yiding Zhou, stable

The pcap file will be synchronized to the disk when stopping the device.
It takes a long time if the file is large that would cause the
'detach sync request' timeout when the device is closed under multi-process
scenario.

This commit fixes the issue by performing synchronization in Tx path

Fixes: 4c173302c307 ("pcap: add new driver")
Cc: stable@dpdk.org

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
---
 drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..52eafa5674 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -3,7 +3,7 @@
  * Copyright(c) 2014 6WIND S.A.
  * All rights reserved.
  */
-
+#include <unistd.h>
 #include <time.h>
 
 #include <pcap.h>
@@ -38,6 +38,8 @@
 
 #define RTE_PMD_PCAP_MAX_QUEUES 16
 
+#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
+
 static char errbuf[PCAP_ERRBUF_SIZE];
 static struct timespec start_time;
 static uint64_t start_cycles;
@@ -47,6 +49,8 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
+RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
+
 struct queue_stat {
 	volatile unsigned long pkts;
 	volatile unsigned long bytes;
@@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
 
 RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
 
+static inline void
+pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes)
+{
+	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
+	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) > ETH_PCAP_SYNC_THRESHOLD)) {
+		if (!fdatasync(fileno(pcap_dump_file(dumper))))
+			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
+	}
+}
+
 static struct queue_missed_stat*
 queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
 {
@@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * process stops and to make sure the pcap file is actually written,
 	 * we flush the pcap dumper within each burst.
 	 */
-	pcap_dump_flush(dumper);
+	pcap_dumper_data_sync(dumper, tx_bytes);
 	dumper_q->tx_stat.pkts += num_tx;
 	dumper_q->tx_stat.bytes += tx_bytes;
 	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] net/pcap: reduce time for stopping device
  2022-08-29 11:50       ` Zhou, YidingX
  2022-08-31 16:42         ` Stephen Hemminger
@ 2022-09-01  7:40         ` Zhou, YidingX
  1 sibling, 0 replies; 8+ messages in thread
From: Zhou, YidingX @ 2022-09-01  7:40 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Zhang, Qi Z, stable

Hi, Ferruh

> > >>> The pcap file will be synchronized to the disk when stopping the device.
> > >>> It takes a long time if the file is large that would cause the
> > >>> 'detach sync request' timeout when the device is closed under
> > >>> multi-process scenario.
> > >>>
> > >>> This commit fixes the issue by performing synchronization in Tx
> > >>> path
> > >>>
> > >>> Fixes: 4c173302c307 ("pcap: add new driver")
> > >>> Cc: stable@dpdk.org
> > >>>
> > >>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > >>> ---
> > >>>    drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
> > >>>    1 file changed, 16 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/drivers/net/pcap/pcap_ethdev.c
> > >>> b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674
> > >>> 100644
> > >>> --- a/drivers/net/pcap/pcap_ethdev.c
> > >>> +++ b/drivers/net/pcap/pcap_ethdev.c
> > >>> @@ -3,7 +3,7 @@
> > >>>     * Copyright(c) 2014 6WIND S.A.
> > >>>     * All rights reserved.
> > >>>     */
> > >>> -
> > >>> +#include <unistd.h>
> > >>>    #include <time.h>
> > >>>
> > >>>    #include <pcap.h>
> > >>> @@ -38,6 +38,8 @@
> > >>>
> > >>>    #define RTE_PMD_PCAP_MAX_QUEUES 16
> > >>>
> > >>> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> > >>> +
> >
> > I guess this is 512MB, can you please comment this.
> > Is there any specific reason to select this value, or is it arbitrary?
> >
> >
> 
> 512M is arbitrary, because there is no API to get the disk cache size associated
> with a specific file.
> I will test the performance impact of different values.
> 
> > >>>    static char errbuf[PCAP_ERRBUF_SIZE];
> > >>>    static struct timespec start_time;
> > >>>    static uint64_t start_cycles;
> > >>> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
> > >>>    static uint64_t timestamp_rx_dynflag;
> > >>>    static int timestamp_dynfield_offset = -1;
> > >>>
> > >>> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> > >>> +
> > >>>    struct queue_stat {
> > >>>    	volatile unsigned long pkts;
> > >>>    	volatile unsigned long bytes;
> > >>> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
> > >>>
> > >>>    RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
> > >>>
> > >>> +static inline void
> > >>> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {
> >
> > 'pcap_' is the namespace for the libpcap, can you select another
> > prefix, like 'eth_pcap_' as many driver functions does.
> >
> > >>> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> > >>> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
> > >> ETH_PCAP_SYNC_THRESHOLD)) {
> > >>> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> > >>> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> > >>> +	}
> > >>> +}
> > >>> +
> >
> > pcap supports multiple queue, and each queue creates a new pcap dumper
> > and single core/thread can be used for this multiple dumpers. In that
> > case I think above per lcore variable logic doesn't work.
> >
> > And instead of having a global value, what do you think to add a
> > variable to 'struct pcap_tx_queue' for this purpose?
> >
> 
> Thanks for the comments, I will follow this.
> 
> > >>>    static struct queue_missed_stat*
> > >>>    queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
> > >>>    {
> > >>> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct
> > >>> rte_mbuf
> > >> **bufs, uint16_t nb_pkts)
> > >>>    	 * process stops and to make sure the pcap file is actually written,
> > >>>    	 * we flush the pcap dumper within each burst.
> > >>>    	 */
> > >>> -	pcap_dump_flush(dumper);
> > >>> +	pcap_dumper_data_sync(dumper, tx_bytes);
> > >>
> > >> 'pcap_dump_flush()' should be doing the same thing, to write buffer
> > >> to file, isn't it working?
> > >>
> > >> Can you check the return value of the 'pcap_dump_flush()' API, I
> > >> wonder if it keeps failing, for some reason?
> > >>
> > >
> > > 'pcap_dump_flush()' returns 0 each time without error, it calls
> > > 'fflush()' to
> > flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to
> > ensure data is written to disk.
> > >
> >
> > 'pcap_dump_flush()' API documentation says "flushes the output buffer
> > to the ``savefile,''", but as you said it uses 'fflush()' internally,
> > so there is a chance that data is not written to the disk.
> >
> > In this case, won't need to keep both, first flush and later fsync/fdatasync?
> >
> 
> I draw a diagram to describe it more clearly
> 
>  fwrite                             fclose/fflush                                                   fclose/fdatasync
> --------->| libc buffer  |----------------> |    disk cache in RAM          |--------------------->
> |disk|
>               | 4096 Bytes |                         | size is determined by OS |
> |       |
> 
> When the libc buffer is full, the system will automatically sync it to the disk
> cache.
> It is easily full as it's only 4096 B size. so there is no need to call 'fflush()' every
> time.
> The real time consuming action is syncing the disk cache to disk.
> Because the disk cache is very large, it will take a long time to sync all at one
> time during 'fclose()', so need to call 'fdatasync()' periodically to amortize the
> time.
> 
> > Do you observe any performance difference after change, since now
> > writing to actual disk on datapath?
> >
> 
> I will verify any performance difference under these 2 scenarios.
> 
I have done some tests, using 'fdatasync()' will cause the packet loss rate to increase by 15%-20%, 
It's not a good solution, need to rework. I plan to use a timer to call pcap_dump_close to fix this, 
do you have any suggestion?
thanks
> > >>>    	dumper_q->tx_stat.pkts += num_tx;
> > >>>    	dumper_q->tx_stat.bytes += tx_bytes;
> > >>>    	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> > >


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] net/pcap: reduce time for stopping device
  2022-08-29 11:50       ` Zhou, YidingX
@ 2022-08-31 16:42         ` Stephen Hemminger
  2022-09-01  7:40         ` Zhou, YidingX
  1 sibling, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2022-08-31 16:42 UTC (permalink / raw)
  To: Zhou, YidingX; +Cc: Ferruh Yigit, dev, Zhang, Qi Z, stable

On Mon, 29 Aug 2022 11:50:29 +0000
"Zhou, YidingX" <yidingx.zhou@intel.com> wrote:

> I draw a diagram to describe it more clearly
> 
>  fwrite                             fclose/fflush                                                   fclose/fdatasync
> --------->| libc buffer  |----------------> |    disk cache in RAM          |---------------------> |disk|  
>               | 4096 Bytes |                         | size is determined by OS |                                |       |
> 
> When the libc buffer is full, the system will automatically sync it to the disk cache.
> It is easily full as it's only 4096 B size. so there is no need to call 'fflush()' every time.
> The real time consuming action is syncing the disk cache to disk.
> Because the disk cache is very large, it will take a long time to sync all at one time during 'fclose()',
> so need to call 'fdatasync()' periodically to amortize the time.

If you want to speed up this, then get rid of stdio and use a faster
API like io_uring. What you tried can help but using a better API
would make bigger gains.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25 12:21     ` Ferruh Yigit
@ 2022-08-29 11:50       ` Zhou, YidingX
  2022-08-31 16:42         ` Stephen Hemminger
  2022-09-01  7:40         ` Zhou, YidingX
  0 siblings, 2 replies; 8+ messages in thread
From: Zhou, YidingX @ 2022-08-29 11:50 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Zhang, Qi Z, stable

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Thursday, August 25, 2022 8:22 PM
> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
> 
> On 8/25/2022 12:17 PM, Zhou, YidingX wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >> Sent: Thursday, August 25, 2022 6:09 PM
> >> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
> >> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
> >> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
> >>
> >> On 8/25/2022 8:20 AM, Yiding Zhou wrote:
> >>> The pcap file will be synchronized to the disk when stopping the device.
> >>> It takes a long time if the file is large that would cause the
> >>> 'detach sync request' timeout when the device is closed under
> >>> multi-process scenario.
> >>>
> >>> This commit fixes the issue by performing synchronization in Tx path
> >>>
> >>> Fixes: 4c173302c307 ("pcap: add new driver")
> >>> Cc: stable@dpdk.org
> >>>
> >>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> >>> ---
> >>>    drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
> >>>    1 file changed, 16 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/net/pcap/pcap_ethdev.c
> >>> b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674 100644
> >>> --- a/drivers/net/pcap/pcap_ethdev.c
> >>> +++ b/drivers/net/pcap/pcap_ethdev.c
> >>> @@ -3,7 +3,7 @@
> >>>     * Copyright(c) 2014 6WIND S.A.
> >>>     * All rights reserved.
> >>>     */
> >>> -
> >>> +#include <unistd.h>
> >>>    #include <time.h>
> >>>
> >>>    #include <pcap.h>
> >>> @@ -38,6 +38,8 @@
> >>>
> >>>    #define RTE_PMD_PCAP_MAX_QUEUES 16
> >>>
> >>> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> >>> +
> 
> I guess this is 512MB, can you please comment this.
> Is there any specific reason to select this value, or is it arbitrary?
> 
> 

512M is arbitrary, because there is no API to get the disk cache size associated with a specific file.
I will test the performance impact of different values.

> >>>    static char errbuf[PCAP_ERRBUF_SIZE];
> >>>    static struct timespec start_time;
> >>>    static uint64_t start_cycles;
> >>> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
> >>>    static uint64_t timestamp_rx_dynflag;
> >>>    static int timestamp_dynfield_offset = -1;
> >>>
> >>> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> >>> +
> >>>    struct queue_stat {
> >>>    	volatile unsigned long pkts;
> >>>    	volatile unsigned long bytes;
> >>> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
> >>>
> >>>    RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
> >>>
> >>> +static inline void
> >>> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {
> 
> 'pcap_' is the namespace for the libpcap, can you select another prefix, like
> 'eth_pcap_' as many driver functions does.
> 
> >>> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> >>> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
> >> ETH_PCAP_SYNC_THRESHOLD)) {
> >>> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> >>> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> >>> +	}
> >>> +}
> >>> +
> 
> pcap supports multiple queue, and each queue creates a new pcap dumper and
> single core/thread can be used for this multiple dumpers. In that case I think
> above per lcore variable logic doesn't work.
> 
> And instead of having a global value, what do you think to add a variable to
> 'struct pcap_tx_queue' for this purpose?
> 

Thanks for the comments, I will follow this.

> >>>    static struct queue_missed_stat*
> >>>    queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
> >>>    {
> >>> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
> >> **bufs, uint16_t nb_pkts)
> >>>    	 * process stops and to make sure the pcap file is actually written,
> >>>    	 * we flush the pcap dumper within each burst.
> >>>    	 */
> >>> -	pcap_dump_flush(dumper);
> >>> +	pcap_dumper_data_sync(dumper, tx_bytes);
> >>
> >> 'pcap_dump_flush()' should be doing the same thing, to write buffer
> >> to file, isn't it working?
> >>
> >> Can you check the return value of the 'pcap_dump_flush()' API, I
> >> wonder if it keeps failing, for some reason?
> >>
> >
> > 'pcap_dump_flush()' returns 0 each time without error, it calls 'fflush()' to
> flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to ensure data
> is written to disk.
> >
> 
> 'pcap_dump_flush()' API documentation says "flushes the output buffer to the
> ``savefile,''", but as you said it uses 'fflush()' internally, so there is a chance that
> data is not written to the disk.
> 
> In this case, won't need to keep both, first flush and later fsync/fdatasync?
> 

I draw a diagram to describe it more clearly

 fwrite                             fclose/fflush                                                   fclose/fdatasync
--------->| libc buffer  |----------------> |    disk cache in RAM          |---------------------> |disk|
              | 4096 Bytes |                         | size is determined by OS |                                |       |

When the libc buffer is full, the system will automatically sync it to the disk cache.
It is easily full as it's only 4096 B size. so there is no need to call 'fflush()' every time.
The real time consuming action is syncing the disk cache to disk.
Because the disk cache is very large, it will take a long time to sync all at one time during 'fclose()',
so need to call 'fdatasync()' periodically to amortize the time.

> Do you observe any performance difference after change, since now writing to
> actual disk on datapath?
> 

I will verify any performance difference under these 2 scenarios.

> >>>    	dumper_q->tx_stat.pkts += num_tx;
> >>>    	dumper_q->tx_stat.bytes += tx_bytes;
> >>>    	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> >


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25 11:17   ` Zhou, YidingX
@ 2022-08-25 12:21     ` Ferruh Yigit
  2022-08-29 11:50       ` Zhou, YidingX
  0 siblings, 1 reply; 8+ messages in thread
From: Ferruh Yigit @ 2022-08-25 12:21 UTC (permalink / raw)
  To: Zhou, YidingX, dev; +Cc: Zhang, Qi Z, stable

On 8/25/2022 12:17 PM, Zhou, YidingX wrote:
> 
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>> Sent: Thursday, August 25, 2022 6:09 PM
>> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
>> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
>> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
>>
>> On 8/25/2022 8:20 AM, Yiding Zhou wrote:
>>> The pcap file will be synchronized to the disk when stopping the device.
>>> It takes a long time if the file is large that would cause the 'detach
>>> sync request' timeout when the device is closed under multi-process
>>> scenario.
>>>
>>> This commit fixes the issue by performing synchronization in Tx path
>>>
>>> Fixes: 4c173302c307 ("pcap: add new driver")
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
>>> ---
>>>    drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
>>>    1 file changed, 16 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/pcap/pcap_ethdev.c
>>> b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674 100644
>>> --- a/drivers/net/pcap/pcap_ethdev.c
>>> +++ b/drivers/net/pcap/pcap_ethdev.c
>>> @@ -3,7 +3,7 @@
>>>     * Copyright(c) 2014 6WIND S.A.
>>>     * All rights reserved.
>>>     */
>>> -
>>> +#include <unistd.h>
>>>    #include <time.h>
>>>
>>>    #include <pcap.h>
>>> @@ -38,6 +38,8 @@
>>>
>>>    #define RTE_PMD_PCAP_MAX_QUEUES 16
>>>
>>> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
>>> +

I guess this is 512MB, can you please comment this.
Is there any specific reason to select this value, or is it arbitrary?


>>>    static char errbuf[PCAP_ERRBUF_SIZE];
>>>    static struct timespec start_time;
>>>    static uint64_t start_cycles;
>>> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
>>>    static uint64_t timestamp_rx_dynflag;
>>>    static int timestamp_dynfield_offset = -1;
>>>
>>> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
>>> +
>>>    struct queue_stat {
>>>    	volatile unsigned long pkts;
>>>    	volatile unsigned long bytes;
>>> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
>>>
>>>    RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
>>>
>>> +static inline void
>>> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {

'pcap_' is the namespace for the libpcap, can you select another prefix, 
like 'eth_pcap_' as many driver functions does.

>>> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
>>> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
>> ETH_PCAP_SYNC_THRESHOLD)) {
>>> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
>>> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
>>> +	}
>>> +}
>>> +

pcap supports multiple queue, and each queue creates a new pcap dumper 
and single core/thread can be used for this multiple dumpers. In that 
case I think above per lcore variable logic doesn't work.

And instead of having a global value, what do you think to add a 
variable to 'struct pcap_tx_queue' for this purpose?

>>>    static struct queue_missed_stat*
>>>    queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
>>>    {
>>> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
>> **bufs, uint16_t nb_pkts)
>>>    	 * process stops and to make sure the pcap file is actually written,
>>>    	 * we flush the pcap dumper within each burst.
>>>    	 */
>>> -	pcap_dump_flush(dumper);
>>> +	pcap_dumper_data_sync(dumper, tx_bytes);
>>
>> 'pcap_dump_flush()' should be doing the same thing, to write buffer to file,
>> isn't it working?
>>
>> Can you check the return value of the 'pcap_dump_flush()' API, I wonder if it
>> keeps failing, for some reason?
>>
> 
> 'pcap_dump_flush()' returns 0 each time without error, it calls 'fflush()' to flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to ensure data is written to disk.
> 

'pcap_dump_flush()' API documentation says "flushes the output buffer to 
the ``savefile,''", but as you said it uses 'fflush()' internally, so 
there is a chance that data is not written to the disk.

In this case, won't need to keep both, first flush and later 
fsync/fdatasync?

Do you observe any performance difference after change, since now 
writing to actual disk on datapath?

>>>    	dumper_q->tx_stat.pkts += num_tx;
>>>    	dumper_q->tx_stat.bytes += tx_bytes;
>>>    	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25 10:09 ` Ferruh Yigit
@ 2022-08-25 11:17   ` Zhou, YidingX
  2022-08-25 12:21     ` Ferruh Yigit
  0 siblings, 1 reply; 8+ messages in thread
From: Zhou, YidingX @ 2022-08-25 11:17 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Zhang, Qi Z, stable



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Thursday, August 25, 2022 6:09 PM
> To: Zhou, YidingX <yidingx.zhou@intel.com>; dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org
> Subject: Re: [PATCH] net/pcap: reduce time for stopping device
> 
> On 8/25/2022 8:20 AM, Yiding Zhou wrote:
> > The pcap file will be synchronized to the disk when stopping the device.
> > It takes a long time if the file is large that would cause the 'detach
> > sync request' timeout when the device is closed under multi-process
> > scenario.
> >
> > This commit fixes the issue by performing synchronization in Tx path
> >
> > Fixes: 4c173302c307 ("pcap: add new driver")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > ---
> >   drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
> >   1 file changed, 16 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/pcap/pcap_ethdev.c
> > b/drivers/net/pcap/pcap_ethdev.c index ec29fd6bc5..52eafa5674 100644
> > --- a/drivers/net/pcap/pcap_ethdev.c
> > +++ b/drivers/net/pcap/pcap_ethdev.c
> > @@ -3,7 +3,7 @@
> >    * Copyright(c) 2014 6WIND S.A.
> >    * All rights reserved.
> >    */
> > -
> > +#include <unistd.h>
> >   #include <time.h>
> >
> >   #include <pcap.h>
> > @@ -38,6 +38,8 @@
> >
> >   #define RTE_PMD_PCAP_MAX_QUEUES 16
> >
> > +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> > +
> >   static char errbuf[PCAP_ERRBUF_SIZE];
> >   static struct timespec start_time;
> >   static uint64_t start_cycles;
> > @@ -47,6 +49,8 @@ static uint8_t iface_idx;
> >   static uint64_t timestamp_rx_dynflag;
> >   static int timestamp_dynfield_offset = -1;
> >
> > +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> > +
> >   struct queue_stat {
> >   	volatile unsigned long pkts;
> >   	volatile unsigned long bytes;
> > @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
> >
> >   RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
> >
> > +static inline void
> > +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes) {
> > +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> > +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) >
> ETH_PCAP_SYNC_THRESHOLD)) {
> > +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> > +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> > +	}
> > +}
> > +
> >   static struct queue_missed_stat*
> >   queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
> >   {
> > @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
> **bufs, uint16_t nb_pkts)
> >   	 * process stops and to make sure the pcap file is actually written,
> >   	 * we flush the pcap dumper within each burst.
> >   	 */
> > -	pcap_dump_flush(dumper);
> > +	pcap_dumper_data_sync(dumper, tx_bytes);
> 
> 'pcap_dump_flush()' should be doing the same thing, to write buffer to file,
> isn't it working?
> 
> Can you check the return value of the 'pcap_dump_flush()' API, I wonder if it
> keeps failing, for some reason?
> 

'pcap_dump_flush()' returns 0 each time without error, it calls 'fflush()' to flush userspace buffers to kernel buffers, not disk. 'fdatasync()' to ensure data is written to disk.

> >   	dumper_q->tx_stat.pkts += num_tx;
> >   	dumper_q->tx_stat.bytes += tx_bytes;
> >   	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] net/pcap: reduce time for stopping device
  2022-08-25  7:20 Yiding Zhou
@ 2022-08-25 10:09 ` Ferruh Yigit
  2022-08-25 11:17   ` Zhou, YidingX
  0 siblings, 1 reply; 8+ messages in thread
From: Ferruh Yigit @ 2022-08-25 10:09 UTC (permalink / raw)
  To: Yiding Zhou, dev; +Cc: qi.z.zhang, stable

On 8/25/2022 8:20 AM, Yiding Zhou wrote:
> The pcap file will be synchronized to the disk when stopping the device.
> It takes a long time if the file is large that would cause the
> 'detach sync request' timeout when the device is closed under multi-process
> scenario.
> 
> This commit fixes the issue by performing synchronization in Tx path
> 
> Fixes: 4c173302c307 ("pcap: add new driver")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> ---
>   drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
>   1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
> index ec29fd6bc5..52eafa5674 100644
> --- a/drivers/net/pcap/pcap_ethdev.c
> +++ b/drivers/net/pcap/pcap_ethdev.c
> @@ -3,7 +3,7 @@
>    * Copyright(c) 2014 6WIND S.A.
>    * All rights reserved.
>    */
> -
> +#include <unistd.h>
>   #include <time.h>
>   
>   #include <pcap.h>
> @@ -38,6 +38,8 @@
>   
>   #define RTE_PMD_PCAP_MAX_QUEUES 16
>   
> +#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
> +
>   static char errbuf[PCAP_ERRBUF_SIZE];
>   static struct timespec start_time;
>   static uint64_t start_cycles;
> @@ -47,6 +49,8 @@ static uint8_t iface_idx;
>   static uint64_t timestamp_rx_dynflag;
>   static int timestamp_dynfield_offset = -1;
>   
> +RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
> +
>   struct queue_stat {
>   	volatile unsigned long pkts;
>   	volatile unsigned long bytes;
> @@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
>   
>   RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
>   
> +static inline void
> +pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes)
> +{
> +	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
> +	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) > ETH_PCAP_SYNC_THRESHOLD)) {
> +		if (!fdatasync(fileno(pcap_dump_file(dumper))))
> +			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
> +	}
> +}
> +
>   static struct queue_missed_stat*
>   queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
>   {
> @@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	 * process stops and to make sure the pcap file is actually written,
>   	 * we flush the pcap dumper within each burst.
>   	 */
> -	pcap_dump_flush(dumper);
> +	pcap_dumper_data_sync(dumper, tx_bytes);

'pcap_dump_flush()' should be doing the same thing, to write buffer to 
file, isn't it working?

Can you check the return value of the 'pcap_dump_flush()' API, I wonder 
if it keeps failing, for some reason?

>   	dumper_q->tx_stat.pkts += num_tx;
>   	dumper_q->tx_stat.bytes += tx_bytes;
>   	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] net/pcap: reduce time for stopping device
@ 2022-08-25  7:20 Yiding Zhou
  2022-08-25 10:09 ` Ferruh Yigit
  0 siblings, 1 reply; 8+ messages in thread
From: Yiding Zhou @ 2022-08-25  7:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, stable, Yiding Zhou

The pcap file will be synchronized to the disk when stopping the device.
It takes a long time if the file is large that would cause the
'detach sync request' timeout when the device is closed under multi-process
scenario.

This commit fixes the issue by performing synchronization in Tx path

Fixes: 4c173302c307 ("pcap: add new driver")
Cc: stable@dpdk.org

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
---
 drivers/net/pcap/pcap_ethdev.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..52eafa5674 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -3,7 +3,7 @@
  * Copyright(c) 2014 6WIND S.A.
  * All rights reserved.
  */
-
+#include <unistd.h>
 #include <time.h>
 
 #include <pcap.h>
@@ -38,6 +38,8 @@
 
 #define RTE_PMD_PCAP_MAX_QUEUES 16
 
+#define ETH_PCAP_SYNC_THRESHOLD 0x20000000
+
 static char errbuf[PCAP_ERRBUF_SIZE];
 static struct timespec start_time;
 static uint64_t start_cycles;
@@ -47,6 +49,8 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
+RTE_DEFINE_PER_LCORE(uint64_t, _pcap_cached_bytes);
+
 struct queue_stat {
 	volatile unsigned long pkts;
 	volatile unsigned long bytes;
@@ -144,6 +148,16 @@ static struct rte_eth_link pmd_link = {
 
 RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
 
+static inline void
+pcap_dumper_data_sync(pcap_dumper_t *dumper, uint32_t bytes)
+{
+	RTE_PER_LCORE(_pcap_cached_bytes) += bytes;
+	if (unlikely(RTE_PER_LCORE(_pcap_cached_bytes) > ETH_PCAP_SYNC_THRESHOLD)) {
+		if (!fdatasync(fileno(pcap_dump_file(dumper))))
+			RTE_PER_LCORE(_pcap_cached_bytes) = 0;
+	}
+}
+
 static struct queue_missed_stat*
 queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
 {
@@ -421,7 +435,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * process stops and to make sure the pcap file is actually written,
 	 * we flush the pcap dumper within each burst.
 	 */
-	pcap_dump_flush(dumper);
+	pcap_dumper_data_sync(dumper, tx_bytes);
 	dumper_q->tx_stat.pkts += num_tx;
 	dumper_q->tx_stat.bytes += tx_bytes;
 	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-09-01  7:41 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-25  6:27 [PATCH] net/pcap: reduce time for stopping device Yiding Zhou
2022-08-25  7:20 Yiding Zhou
2022-08-25 10:09 ` Ferruh Yigit
2022-08-25 11:17   ` Zhou, YidingX
2022-08-25 12:21     ` Ferruh Yigit
2022-08-29 11:50       ` Zhou, YidingX
2022-08-31 16:42         ` Stephen Hemminger
2022-09-01  7:40         ` Zhou, YidingX

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).