* [dpdk-dev] [PATCH 2/2] ixgbe:replace compiler memory barrier and rte_wmb with rte_dma_rmb and rte_dma_wmb.
[not found] <1435504998-15566-1-git-send-email-dong.wang.pro@hotmail.com>
@ 2015-06-28 15:23 ` WangDong
2015-07-02 16:19 ` Ananyev, Konstantin
0 siblings, 1 reply; 2+ messages in thread
From: WangDong @ 2015-06-28 15:23 UTC (permalink / raw)
To: dev
---
drivers/net/ixgbe/ixgbe_rxtx.c | 30 +++++++++---------------------
drivers/net/ixgbe/ixgbe_rxtx_vec.c | 3 +++
2 files changed, 12 insertions(+), 21 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 3ace8a8..3316488 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -130,6 +130,7 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
/* check DD bit on threshold descriptor */
status = txq->tx_ring[txq->tx_next_dd].wb.status;
+ rte_dma_rmb();
if (! (status & IXGBE_ADVTXD_STAT_DD))
return 0;
@@ -320,7 +321,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_tail = 0;
/* update tail pointer */
- rte_wmb();
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
return nb_pkts;
@@ -841,7 +842,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
}
end_of_tx:
- rte_wmb();
/*
* Set the Transmit Descriptor Tail (TDT)
@@ -849,6 +849,7 @@ end_of_tx:
PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
(unsigned) txq->port_id, (unsigned) txq->queue_id,
(unsigned) tx_id, (unsigned) nb_tx);
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
txq->tx_tail = tx_id;
@@ -975,6 +976,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
/* Compute how many status bits were set */
nb_dd = 0;
+ rte_dma_rmb();
for (j = 0; j < LOOK_AHEAD; ++j)
nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
@@ -1138,7 +1140,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
}
/* update tail pointer */
- rte_wmb();
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
}
@@ -1229,13 +1231,10 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
/*
* The order of operations here is important as the DD status
* bit must not be read after any other descriptor fields.
- * rx_ring and rxdp are pointing to volatile data so the order
- * of accesses cannot be reordered by the compiler. If they were
- * not volatile, they could be reordered which could lead to
- * using invalid descriptor fields when read from rxd.
*/
rxdp = &rx_ring[rx_id];
staterr = rxdp->wb.upper.status_error;
+ rte_dma_rmb();
if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
break;
rxd = *rxdp;
@@ -1373,6 +1372,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
(unsigned) nb_rx);
rx_id = (uint16_t) ((rx_id == 0) ?
(rxq->nb_rx_desc - 1) : (rx_id - 1));
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
nb_hold = 0;
}
@@ -1494,17 +1494,6 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
next_desc:
/*
- * The code in this whole file uses the volatile pointer to
- * ensure the read ordering of the status and the rest of the
- * descriptor fields (on the compiler level only!!!). This is so
- * UGLY - why not to just use the compiler barrier instead? DPDK
- * even has the rte_compiler_barrier() for that.
- *
- * But most importantly this is just wrong because this doesn't
- * ensure memory ordering in a general case at all. For
- * instance, DPDK is supposed to work on Power CPUs where
- * compiler barrier may just not be enough!
- *
* I tried to write only this function properly to have a
* starting point (as a part of an LRO/RSC series) but the
* compiler cursed at me when I tried to cast away the
@@ -1519,12 +1508,11 @@ next_desc:
* TODO:
* - Get rid of "volatile" crap and let the compiler do its
* job.
- * - Use the proper memory barrier (rte_rmb()) to ensure the
- * memory ordering below.
*/
rxdp = &rx_ring[rx_id];
staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
+ rte_dma_rmb();
if (!(staterr & IXGBE_RXDADV_STAT_DD))
break;
@@ -1704,7 +1692,7 @@ next_desc:
"nb_hold=%u nb_rx=%u",
rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
- rte_wmb();
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
nb_hold = 0;
}
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index abd10f6..af4d779 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -123,6 +123,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
/* Update the tail pointer on the NIC */
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
}
@@ -528,6 +529,7 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
/* check DD bit on threshold descriptor */
status = txq->tx_ring[txq->tx_next_dd].wb.status;
+ rte_dma_rmb();
if (!(status & IXGBE_ADVTXD_STAT_DD))
return 0;
@@ -645,6 +647,7 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_tail = tx_id;
+ rte_dma_wmb();
IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
return nb_pkts;
--
2.1.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [dpdk-dev] [PATCH 2/2] ixgbe:replace compiler memory barrier and rte_wmb with rte_dma_rmb and rte_dma_wmb.
2015-06-28 15:23 ` [dpdk-dev] [PATCH 2/2] ixgbe:replace compiler memory barrier and rte_wmb with rte_dma_rmb and rte_dma_wmb WangDong
@ 2015-07-02 16:19 ` Ananyev, Konstantin
0 siblings, 0 replies; 2+ messages in thread
From: Ananyev, Konstantin @ 2015-07-02 16:19 UTC (permalink / raw)
To: WangDong, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of WangDong
> Sent: Sunday, June 28, 2015 4:23 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 2/2] ixgbe:replace compiler memory barrier and rte_wmb with rte_dma_rmb and rte_dma_wmb.
>
> ---
> drivers/net/ixgbe/ixgbe_rxtx.c | 30 +++++++++---------------------
> drivers/net/ixgbe/ixgbe_rxtx_vec.c | 3 +++
> 2 files changed, 12 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 3ace8a8..3316488 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -130,6 +130,7 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
>
> /* check DD bit on threshold descriptor */
> status = txq->tx_ring[txq->tx_next_dd].wb.status;
> + rte_dma_rmb();
> if (! (status & IXGBE_ADVTXD_STAT_DD))
> return 0;
Could you explain, why do we need rmb here for weak ordering model?
We don't read rest of TXD later, so nothing could be reordered here.
>
> @@ -320,7 +321,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> txq->tx_tail = 0;
>
> /* update tail pointer */
> - rte_wmb();
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
>
> return nb_pkts;
> @@ -841,7 +842,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
> }
> end_of_tx:
> - rte_wmb();
>
> /*
> * Set the Transmit Descriptor Tail (TDT)
> @@ -849,6 +849,7 @@ end_of_tx:
> PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
> (unsigned) txq->port_id, (unsigned) txq->queue_id,
> (unsigned) tx_id, (unsigned) nb_tx);
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
> txq->tx_tail = tx_id;
>
> @@ -975,6 +976,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
>
> /* Compute how many status bits were set */
> nb_dd = 0;
> + rte_dma_rmb();
I think that's a bit too late for rmb() here.
We need to preserve order of reading all 8 statuses, so I am afraid we need to:
/* Read desc statuses backwards to avoid race condition */
-for (j = LOOK_AHEAD-1; j >= 0; --j)
+for (j = LOOK_AHEAD-1; j >= 0; --j) {
+ rte_dma_wmb();
s[j] = rxdp[j].wb.upper.status_error;
+}
> for (j = 0; j < LOOK_AHEAD; ++j)
> nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
>
> @@ -1138,7 +1140,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> }
>
> /* update tail pointer */
> - rte_wmb();
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
> }
>
> @@ -1229,13 +1231,10 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> /*
> * The order of operations here is important as the DD status
> * bit must not be read after any other descriptor fields.
> - * rx_ring and rxdp are pointing to volatile data so the order
> - * of accesses cannot be reordered by the compiler. If they were
> - * not volatile, they could be reordered which could lead to
> - * using invalid descriptor fields when read from rxd.
> */
> rxdp = &rx_ring[rx_id];
> staterr = rxdp->wb.upper.status_error;
> + rte_dma_rmb();
> if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
> break;
> rxd = *rxdp;
> @@ -1373,6 +1372,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> (unsigned) nb_rx);
> rx_id = (uint16_t) ((rx_id == 0) ?
> (rxq->nb_rx_desc - 1) : (rx_id - 1));
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
> nb_hold = 0;
> }
> @@ -1494,17 +1494,6 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
>
> next_desc:
> /*
> - * The code in this whole file uses the volatile pointer to
> - * ensure the read ordering of the status and the rest of the
> - * descriptor fields (on the compiler level only!!!). This is so
> - * UGLY - why not to just use the compiler barrier instead? DPDK
> - * even has the rte_compiler_barrier() for that.
> - *
> - * But most importantly this is just wrong because this doesn't
> - * ensure memory ordering in a general case at all. For
> - * instance, DPDK is supposed to work on Power CPUs where
> - * compiler barrier may just not be enough!
> - *
> * I tried to write only this function properly to have a
> * starting point (as a part of an LRO/RSC series) but the
> * compiler cursed at me when I tried to cast away the
> @@ -1519,12 +1508,11 @@ next_desc:
> * TODO:
> * - Get rid of "volatile" crap and let the compiler do its
> * job.
> - * - Use the proper memory barrier (rte_rmb()) to ensure the
> - * memory ordering below.
> */
> rxdp = &rx_ring[rx_id];
> staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
>
> + rte_dma_rmb();
> if (!(staterr & IXGBE_RXDADV_STAT_DD))
> break;
>
> @@ -1704,7 +1692,7 @@ next_desc:
> "nb_hold=%u nb_rx=%u",
> rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
>
> - rte_wmb();
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
> nb_hold = 0;
> }
I think you missed one more wmb() in that function:
ixgbe_recv_pkts_lro(...)
{
...
} else if (nb_hold > rxq->rx_free_thresh) {
uint16_t next_rdt = rxq->rx_free_trigger;
if (!ixgbe_rx_alloc_bufs(rxq, false)) {
rte_wmb();
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
next_rdt);
nb_hold -= rxq->rx_free_thresh;
} else {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> index abd10f6..af4d779 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
In fact, I think there is no much point to modify that one.
Vector routines use IA specific instrincts, so that code wouldn't work on any other architecture anyway.
> @@ -123,6 +123,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
> (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
>
> /* Update the tail pointer on the NIC */
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
> }
>
> @@ -528,6 +529,7 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
>
> /* check DD bit on threshold descriptor */
> status = txq->tx_ring[txq->tx_next_dd].wb.status;
> + rte_dma_rmb();
> if (!(status & IXGBE_ADVTXD_STAT_DD))
> return 0;
Again, as with its scalar counterpart, I don't think we need rmb here.
We read only status from one TXD, that's it.
But as I said above, there is probably no need to touch that file at all.
Konstantin
>
> @@ -645,6 +647,7 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>
> txq->tx_tail = tx_id;
>
> + rte_dma_wmb();
> IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
>
> return nb_pkts;
> --
> 2.1.0
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-07-02 16:20 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <1435504998-15566-1-git-send-email-dong.wang.pro@hotmail.com>
2015-06-28 15:23 ` [dpdk-dev] [PATCH 2/2] ixgbe:replace compiler memory barrier and rte_wmb with rte_dma_rmb and rte_dma_wmb WangDong
2015-07-02 16:19 ` Ananyev, Konstantin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).