* [dpdk-dev] [PATCH v2 1/6] ixgbe: Cleanups
2015-03-05 11:27 [dpdk-dev] [PATCH v2 0/6]: Add LRO support to ixgbe PMD Vlad Zolotarov
@ 2015-03-05 11:28 ` Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 2/6] ixgbe: Bug fix: Properly configure Rx CRC stripping for x540 devices Vlad Zolotarov
` (4 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 11:28 UTC (permalink / raw)
To: dev
- Removed the not needed casting.
- Use the rte_le_to_cpu_xx()/rte_cpu_to_le_xx() when reading/setting HW
ring descriptor fields. There were a few places where fields were accessed/written
directly, which would break on big endian platforms like Power PC.
- ixgbe_dev_rx_init(): shorten the lines by defining a local alias variable to access
&dev->data->dev_conf.rxmode.
Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 52 +++++++++++++++++++++------------------
1 file changed, 28 insertions(+), 24 deletions(-)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 3059375..6c0e466 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -1028,12 +1028,11 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
struct igb_rx_entry *rxep;
struct rte_mbuf *mb;
uint16_t alloc_idx;
- uint64_t dma_addr;
+ __le64 dma_addr;
int diag, i;
/* allocate buffers in bulk directly into the S/W ring */
- alloc_idx = (uint16_t)(rxq->rx_free_trigger -
- (rxq->rx_free_thresh - 1));
+ alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
rxep = &rxq->sw_ring[alloc_idx];
diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
rxq->rx_free_thresh);
@@ -1051,7 +1050,7 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
mb->port = rxq->port_id;
/* populate the descriptors */
- dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+ dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
rxdp[i].read.hdr_addr = dma_addr;
rxdp[i].read.pkt_addr = dma_addr;
}
@@ -1061,10 +1060,9 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
/* update state of internal queue structure */
- rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger +
- rxq->rx_free_thresh);
+ rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
- rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
+ rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
/* no errors */
return 0;
@@ -1559,13 +1557,14 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
first_seg->ol_flags = pkt_flags;
if (likely(pkt_flags & PKT_RX_RSS_HASH))
- first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
+ first_seg->hash.rss =
+ rte_le_to_cpu_32(rxd.wb.lower.hi_dword.rss);
else if (pkt_flags & PKT_RX_FDIR) {
first_seg->hash.fdir.hash =
- (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
- & IXGBE_ATR_HASH_MASK);
+ rte_le_to_cpu_16(rxd.wb.lower.hi_dword.csum_ip.csum)
+ & IXGBE_ATR_HASH_MASK;
first_seg->hash.fdir.id =
- rxd.wb.lower.hi_dword.csum_ip.ip_id;
+ rte_le_to_cpu_16(rxd.wb.lower.hi_dword.csum_ip.ip_id);
}
/* Prefetch data of first segment, if configured to do so. */
@@ -2248,6 +2247,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
#ifdef RTE_IXGBE_INC_VECTOR
ixgbe_rxq_vec_setup(rxq);
#endif
+ /*
+ * TODO: This must be moved to ixgbe_dev_rx_init() since rx_pkt_burst
+ * is a global per-device callback thus bulk allocation may be used
+ * only if all queues meet the above preconditions.
+ */
+
/* Check if pre-conditions are satisfied, and no Scattered Rx */
if (!use_def_burst_func && !dev->data->scattered_rx) {
#ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
@@ -3523,6 +3528,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
uint32_t rxcsum;
uint16_t buf_size;
uint16_t i;
+ struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
PMD_INIT_FUNC_TRACE();
hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -3545,7 +3551,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
* Configure CRC stripping, if any.
*/
hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
- if (dev->data->dev_conf.rxmode.hw_strip_crc)
+ if (rx_conf->hw_strip_crc)
hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
else
hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
@@ -3553,11 +3559,11 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
/*
* Configure jumbo frame support, if any.
*/
- if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
+ if (rx_conf->jumbo_frame == 1) {
hlreg0 |= IXGBE_HLREG0_JUMBOEN;
maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
maxfrs &= 0x0000FFFF;
- maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
+ maxfrs |= (rx_conf->max_rx_pkt_len << 16);
IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
} else
hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
@@ -3581,9 +3587,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
* Reset crc_len in case it was changed after queue setup by a
* call to configure.
*/
- rxq->crc_len = (uint8_t)
- ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
- ETHER_CRC_LEN);
+ rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
/* Setup the Base and Length of the Rx Descriptor Rings */
bus_addr = rxq->rx_ring_phys_addr;
@@ -3601,7 +3605,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
/*
* Configure Header Split
*/
- if (dev->data->dev_conf.rxmode.header_split) {
+ if (rx_conf->header_split) {
if (hw->mac.type == ixgbe_mac_82599EB) {
/* Must setup the PSRTYPE register */
uint32_t psrtype;
@@ -3611,7 +3615,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
IXGBE_PSRTYPE_IPV6HDR;
IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
}
- srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
+ srrctl = ((rx_conf->split_hdr_size <<
IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
IXGBE_SRRCTL_BSIZEHDR_MASK);
srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
@@ -3640,8 +3644,8 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
IXGBE_SRRCTL_BSIZEPKT_SHIFT);
/* It adds dual VLAN length for supporting dual VLAN */
- if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
- 2 * IXGBE_VLAN_TAG_SIZE) > buf_size){
+ if ((rx_conf->max_rx_pkt_len + 2 * IXGBE_VLAN_TAG_SIZE) >
+ buf_size) {
if (!dev->data->scattered_rx)
PMD_INIT_LOG(DEBUG, "forcing scatter mode");
dev->data->scattered_rx = 1;
@@ -3653,7 +3657,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
}
}
- if (dev->data->dev_conf.rxmode.enable_scatter) {
+ if (rx_conf->enable_scatter) {
if (!dev->data->scattered_rx)
PMD_INIT_LOG(DEBUG, "forcing scatter mode");
#ifdef RTE_IXGBE_INC_VECTOR
@@ -3676,7 +3680,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
*/
rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
rxcsum |= IXGBE_RXCSUM_PCSD;
- if (dev->data->dev_conf.rxmode.hw_ip_checksum)
+ if (rx_conf->hw_ip_checksum)
rxcsum |= IXGBE_RXCSUM_IPPCSE;
else
rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
@@ -3685,7 +3689,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
if (hw->mac.type == ixgbe_mac_82599EB) {
rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
- if (dev->data->dev_conf.rxmode.hw_strip_crc)
+ if (rx_conf->hw_strip_crc)
rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
else
rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
--
2.1.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2 2/6] ixgbe: Bug fix: Properly configure Rx CRC stripping for x540 devices
2015-03-05 11:27 [dpdk-dev] [PATCH v2 0/6]: Add LRO support to ixgbe PMD Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Cleanups Vlad Zolotarov
@ 2015-03-05 11:28 ` Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 3/6] ixgbe: Code refactoring Vlad Zolotarov
` (3 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 11:28 UTC (permalink / raw)
To: dev
According to x540 spec chapter 8.2.4.8.9 CRCSTRIP field of RDRXCTL should
be configured to the same value as HLREG0.RXCRCSTRP.
Clearing the RDRXCTL.RSCFRSTSIZE field for x540 is not required by the spec
but seems harmless.
Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 6c0e466..35a88d8 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3687,7 +3687,8 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
- if (hw->mac.type == ixgbe_mac_82599EB) {
+ if (hw->mac.type == ixgbe_mac_82599EB ||
+ hw->mac.type == ixgbe_mac_X540) {
rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
if (rx_conf->hw_strip_crc)
rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
--
2.1.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2 3/6] ixgbe: Code refactoring
2015-03-05 11:27 [dpdk-dev] [PATCH v2 0/6]: Add LRO support to ixgbe PMD Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Cleanups Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 2/6] ixgbe: Bug fix: Properly configure Rx CRC stripping for x540 devices Vlad Zolotarov
@ 2015-03-05 11:28 ` Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 4/6] ixgbe: Unify the rx_pkt_bulk callback initialization Vlad Zolotarov
` (2 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 11:28 UTC (permalink / raw)
To: dev
- ixgbe_rx_alloc_bufs():
- Reset the rte_mbuf fields only when requested.
- Take the RDT update out of the function.
- Add the stub when RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not defined.
- ixgbe_recv_scattered_pkts():
- Take the code that updates the fields of the cluster's HEAD buffer into
the inline function.
Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 116 +++++++++++++++++++++++---------------
1 file changed, 70 insertions(+), 46 deletions(-)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 35a88d8..4c67a9e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -1022,7 +1022,7 @@ ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
}
static inline int
-ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
+ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq, bool reset_mbuf)
{
volatile union ixgbe_adv_rx_desc *rxdp;
struct igb_rx_entry *rxep;
@@ -1043,11 +1043,14 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
for (i = 0; i < rxq->rx_free_thresh; ++i) {
/* populate the static rte mbuf fields */
mb = rxep[i].mbuf;
- rte_mbuf_refcnt_set(mb, 1);
- mb->next = NULL;
+ if (reset_mbuf) {
+ rte_mbuf_refcnt_set(mb, 1);
+ mb->next = NULL;
+ mb->nb_segs = 1;
+ mb->port = rxq->port_id;
+ }
+
mb->data_off = RTE_PKTMBUF_HEADROOM;
- mb->nb_segs = 1;
- mb->port = rxq->port_id;
/* populate the descriptors */
dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
@@ -1055,10 +1058,6 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
rxdp[i].read.pkt_addr = dma_addr;
}
- /* update tail pointer */
- rte_wmb();
- IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
-
/* update state of internal queue structure */
rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
@@ -1110,7 +1109,9 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
/* if required, allocate new buffers to replenish descriptors */
if (rxq->rx_tail > rxq->rx_free_trigger) {
- if (ixgbe_rx_alloc_bufs(rxq) != 0) {
+ uint16_t cur_free_trigger = rxq->rx_free_trigger;
+
+ if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
int i, j;
PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
"queue_id=%u", (unsigned) rxq->port_id,
@@ -1130,6 +1131,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
return 0;
}
+
+ /* update tail pointer */
+ rte_wmb();
+ IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
}
if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1169,6 +1174,13 @@ ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_rx;
}
+#else
+static inline int
+ixgbe_rx_alloc_bufs(__rte_unused struct igb_rx_queue *rxq,
+ __rte_unused bool reset_mbuf)
+{
+ return -ENOMEM;
+}
#endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
uint16_t
@@ -1353,6 +1365,51 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
return (nb_rx);
}
+/**
+ * Initialize the first mbuf of the returned packet:
+ * - RX port identifier,
+ * - hardware offload data, if any:
+ * - RSS flag & hash,
+ * - IP checksum flag,
+ * - VLAN TCI, if any,
+ * - error flags.
+ * @head HEAD of the packet cluster
+ * @desc HW descriptor to get data from
+ * @port_id Port ID of the Rx queue
+ */
+static inline void ixgbe_fill_cluster_head_buf(
+ struct rte_mbuf *head,
+ union ixgbe_adv_rx_desc *desc,
+ uint8_t port_id,
+ uint32_t staterr)
+{
+ uint32_t hlen_type_rss;
+ uint64_t pkt_flags;
+
+ head->port = port_id;
+
+ /*
+ * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
+ * set in the pkt_flags field.
+ */
+ head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
+ hlen_type_rss = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
+ pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
+ pkt_flags |= rx_desc_status_to_pkt_flags(staterr);
+ pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
+ head->ol_flags = pkt_flags;
+
+ if (likely(pkt_flags & PKT_RX_RSS_HASH))
+ head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
+ else if (pkt_flags & PKT_RX_FDIR) {
+ head->hash.fdir.hash =
+ rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
+ & IXGBE_ATR_HASH_MASK;
+ head->hash.fdir.id =
+ rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
+ }
+}
+
uint16_t
ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -1369,12 +1426,10 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
union ixgbe_adv_rx_desc rxd;
uint64_t dma; /* Physical address of mbuf data buffer */
uint32_t staterr;
- uint32_t hlen_type_rss;
uint16_t rx_id;
uint16_t nb_rx;
uint16_t nb_hold;
uint16_t data_len;
- uint64_t pkt_flags;
nb_rx = 0;
nb_hold = 0;
@@ -1532,40 +1587,9 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
(uint16_t) (data_len - ETHER_CRC_LEN);
}
- /*
- * Initialize the first mbuf of the returned packet:
- * - RX port identifier,
- * - hardware offload data, if any:
- * - RSS flag & hash,
- * - IP checksum flag,
- * - VLAN TCI, if any,
- * - error flags.
- */
- first_seg->port = rxq->port_id;
-
- /*
- * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
- * set in the pkt_flags field.
- */
- first_seg->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
- hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
- pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
- pkt_flags = (pkt_flags |
- rx_desc_status_to_pkt_flags(staterr));
- pkt_flags = (pkt_flags |
- rx_desc_error_to_pkt_flags(staterr));
- first_seg->ol_flags = pkt_flags;
-
- if (likely(pkt_flags & PKT_RX_RSS_HASH))
- first_seg->hash.rss =
- rte_le_to_cpu_32(rxd.wb.lower.hi_dword.rss);
- else if (pkt_flags & PKT_RX_FDIR) {
- first_seg->hash.fdir.hash =
- rte_le_to_cpu_16(rxd.wb.lower.hi_dword.csum_ip.csum)
- & IXGBE_ATR_HASH_MASK;
- first_seg->hash.fdir.id =
- rte_le_to_cpu_16(rxd.wb.lower.hi_dword.csum_ip.ip_id);
- }
+ /* Initialize the first mbuf of the returned packet */
+ ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
+ staterr);
/* Prefetch data of first segment, if configured to do so. */
rte_packet_prefetch((char *)first_seg->buf_addr +
--
2.1.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2 4/6] ixgbe: Unify the rx_pkt_bulk callback initialization
2015-03-05 11:27 [dpdk-dev] [PATCH v2 0/6]: Add LRO support to ixgbe PMD Vlad Zolotarov
` (2 preceding siblings ...)
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 3/6] ixgbe: Code refactoring Vlad Zolotarov
@ 2015-03-05 11:28 ` Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option Vlad Zolotarov
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 6/6] ixgbe: Add LRO support Vlad Zolotarov
5 siblings, 0 replies; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 11:28 UTC (permalink / raw)
To: dev
- Set the callback in a single function (set_rx_function()) that is called from
ixgbe_dev_rx_init() for a primary process and from eth_ixgbe_dev_init()
for a secondary processes. This is instead of multiple, hard to track places.
- Bug fix: rx_pkt_bulk callback setting was done based on the last queue configuration,
while callback is per-port thus all queues configuration should be taken
into an account. Therefore added ixgbe_hw.rx_bulk_alloc_allowed to hold
the appropriate state that is then considered in set_rx_function().
- Bug fix: vector scattered packets callback was called regardless the appropriate
preconditions.
Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
lib/librte_pmd_ixgbe/ixgbe/ixgbe_type.h | 1 +
lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 14 ++-
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 158 ++++++++++++++++++++------------
lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 20 +++-
4 files changed, 128 insertions(+), 65 deletions(-)
diff --git a/lib/librte_pmd_ixgbe/ixgbe/ixgbe_type.h b/lib/librte_pmd_ixgbe/ixgbe/ixgbe_type.h
index c67d462..c60081c 100644
--- a/lib/librte_pmd_ixgbe/ixgbe/ixgbe_type.h
+++ b/lib/librte_pmd_ixgbe/ixgbe/ixgbe_type.h
@@ -3657,6 +3657,7 @@ struct ixgbe_hw {
bool force_full_reset;
bool allow_unsupported_sfp;
bool wol_enabled;
+ bool rx_bulk_alloc_allowed;
};
#define ixgbe_call_func(hw, func, params, error) \
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index 9bdc046..f93dcfc 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -760,8 +760,8 @@ eth_ixgbe_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
"Using default TX function.");
}
- if (eth_dev->data->scattered_rx)
- eth_dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+ set_rx_function(eth_dev);
+
return 0;
}
pci_dev = eth_dev->pci_dev;
@@ -771,6 +771,15 @@ eth_ixgbe_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
hw->vendor_id = pci_dev->id.vendor_id;
hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
hw->allow_unsupported_sfp = 1;
+#ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
+ /*
+ * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
+ * allocation preconditions we will reset it.
+ */
+ hw->rx_bulk_alloc_allowed = true;
+#else
+ hw->rx_bulk_alloc_allowed = false;
+#endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
/* Initialize the shared code (base driver) */
#ifdef RTE_NIC_BYPASS
@@ -1641,6 +1650,7 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
/* Clear stored conf */
dev->data->scattered_rx = 0;
+ hw->rx_bulk_alloc_allowed = false;
/* Clear recorded link status */
memset(&link, 0, sizeof(link));
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 4c67a9e..bdac885 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -2096,12 +2096,12 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct igb_rx_queue *rxq)
/* Reset dynamic igb_rx_queue fields back to defaults */
static void
-ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
+ixgbe_reset_rx_queue(struct ixgbe_hw *hw, struct igb_rx_queue *rxq)
{
static const union ixgbe_adv_rx_desc zeroed_desc = { .read = {
.pkt_addr = 0}};
unsigned i;
- uint16_t len;
+ uint16_t len = rxq->nb_rx_desc;
/*
* By default, the Rx queue setup function allocates enough memory for
@@ -2113,14 +2113,9 @@ ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
* constraints here to see if we need to zero out memory after the end
* of the H/W descriptor ring.
*/
-#ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
- if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
+ if (hw->rx_bulk_alloc_allowed)
/* zero out extra memory */
- len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
- else
-#endif
- /* do not zero out extra memory */
- len = rxq->nb_rx_desc;
+ len += RTE_PMD_IXGBE_RX_MAX_BURST;
/*
* Zero out HW ring memory. Zero out extra memory at the end of
@@ -2162,7 +2157,6 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
const struct rte_memzone *rz;
struct igb_rx_queue *rxq;
struct ixgbe_hw *hw;
- int use_def_burst_func = 1;
uint16_t len;
PMD_INIT_FUNC_TRACE();
@@ -2247,11 +2241,10 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
* S/W ring to make sure look-ahead logic in bulk alloc Rx burst
* function does not access an invalid memory region.
*/
-#ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
- len = (uint16_t)(nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
-#else
len = nb_desc;
-#endif
+ if (hw->rx_bulk_alloc_allowed)
+ len += RTE_PMD_IXGBE_RX_MAX_BURST;
+
rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
sizeof(struct igb_rx_entry) * len,
RTE_CACHE_LINE_SIZE, socket_id);
@@ -2264,45 +2257,24 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
/*
* Certain constraints must be met in order to use the bulk buffer
- * allocation Rx burst function.
+ * allocation Rx burst function. If any of Rx queues doesn't meet them
+ * the feature should be disabled for the whole port.
*/
- use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
+ if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
+ PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
+ "preconditions - canceling the feature for "
+ "the whole port[%d]",
+ rxq->queue_id, rxq->port_id);
+ hw->rx_bulk_alloc_allowed = false;
+ }
#ifdef RTE_IXGBE_INC_VECTOR
ixgbe_rxq_vec_setup(rxq);
#endif
- /*
- * TODO: This must be moved to ixgbe_dev_rx_init() since rx_pkt_burst
- * is a global per-device callback thus bulk allocation may be used
- * only if all queues meet the above preconditions.
- */
- /* Check if pre-conditions are satisfied, and no Scattered Rx */
- if (!use_def_burst_func && !dev->data->scattered_rx) {
-#ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
- PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
- "satisfied. Rx Burst Bulk Alloc function will be "
- "used on port=%d, queue=%d.",
- rxq->port_id, rxq->queue_id);
- dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
-#ifdef RTE_IXGBE_INC_VECTOR
- if (!ixgbe_rx_vec_condition_check(dev)) {
- PMD_INIT_LOG(INFO, "Vector rx enabled, please make "
- "sure RX burst size no less than 32.");
- dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
- }
-#endif
-#endif
- } else {
- PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
- "are not satisfied, Scattered Rx is requested, "
- "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
- "enabled (port=%d, queue=%d).",
- rxq->port_id, rxq->queue_id);
- }
dev->data->rx_queues[queue_idx] = rxq;
- ixgbe_reset_rx_queue(rxq);
+ ixgbe_reset_rx_queue(hw, rxq);
return 0;
}
@@ -2356,6 +2328,7 @@ void
ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
{
unsigned i;
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
PMD_INIT_FUNC_TRACE();
@@ -2371,7 +2344,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
struct igb_rx_queue *rxq = dev->data->rx_queues[i];
if (rxq != NULL) {
ixgbe_rx_queue_release_mbufs(rxq);
- ixgbe_reset_rx_queue(rxq);
+ ixgbe_reset_rx_queue(hw, rxq);
}
}
}
@@ -3533,6 +3506,57 @@ ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
return 0;
}
+void set_rx_function(struct rte_eth_dev *dev)
+{
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ bool vec_is_allowed = !ixgbe_rx_vec_condition_check(dev);
+
+ if (!vec_is_allowed)
+ PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
+ "preconditions or RTE_IXGBE_INC_VECTOR is "
+ "not enabled",
+ dev->data->port_id);
+
+ /* Check if bulk alloc is allowed and no Scattered Rx */
+ if (hw->rx_bulk_alloc_allowed && !dev->data->scattered_rx) {
+ PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
+ "satisfied. Rx Burst Bulk Alloc function "
+ "will be used on port=%d.",
+ dev->data->port_id);
+ dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
+
+ if (vec_is_allowed) {
+ PMD_INIT_LOG(INFO, "Vector rx enabled, please make "
+ "sure RX burst size no less "
+ "than 32.");
+ dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
+ }
+ } else {
+ dev->rx_pkt_burst = ixgbe_recv_pkts;
+
+ PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
+ "satisfied, or Scattered Rx is requested, "
+ "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC "
+ "is not enabled (port=%d).",
+ dev->data->port_id);
+ }
+
+ if (dev->data->scattered_rx) {
+ if (vec_is_allowed) {
+ PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx Bulk "
+ "callback (port=%d).",
+ dev->data->port_id);
+ dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
+ } else {
+ PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector) "
+ "Scattered Rx Bulk callback "
+ "(port=%d).",
+ dev->data->port_id);
+ dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+ }
+ }
+}
+
/*
* Initializes Receive Unit.
*/
@@ -3673,24 +3697,13 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
if (!dev->data->scattered_rx)
PMD_INIT_LOG(DEBUG, "forcing scatter mode");
dev->data->scattered_rx = 1;
-#ifdef RTE_IXGBE_INC_VECTOR
- dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
-#else
- dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
-#endif
}
}
- if (rx_conf->enable_scatter) {
- if (!dev->data->scattered_rx)
- PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-#ifdef RTE_IXGBE_INC_VECTOR
- dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
-#else
- dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
-#endif
+ if (rx_conf->enable_scatter)
dev->data->scattered_rx = 1;
- }
+
+ set_rx_function(dev);
/*
* Device configured with multiple RX queues.
@@ -3967,7 +3980,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
rte_delay_us(RTE_IXGBE_WAIT_100_US);
ixgbe_rx_queue_release_mbufs(rxq);
- ixgbe_reset_rx_queue(rxq);
+ ixgbe_reset_rx_queue(hw, rxq);
} else
return -1;
@@ -4331,3 +4344,26 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
}
}
+
+/* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
+#ifndef RTE_IXGBE_INC_VECTOR
+int ixgbe_rx_vec_condition_check(
+ struct rte_eth_dev __rte_unused *dev)
+{
+ return -1;
+}
+
+uint16_t
+ixgbe_recv_pkts_vec(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+uint16_t ixgbe_recv_scattered_pkts_vec(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts, uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+#endif
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
index 329007c..18c9154 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
@@ -255,16 +255,32 @@ struct ixgbe_txq_ops {
*/
void set_tx_function(struct rte_eth_dev *dev, struct igb_tx_queue *txq);
-#ifdef RTE_IXGBE_INC_VECTOR
+/**
+ * Sets the rx_pkt_burst callback in the ixgbe rte_eth_dev instance.
+ *
+ * Sets the callback based on the device parameters:
+ * - ixgbe_hw.rx_bulk_alloc_allowed
+ * - rte_eth_dev_data.scattered_rx
+ * - rte_eth_dev_data.lro
+ * - conditions checked in ixgbe_rx_vec_condition_check()
+ *
+ * This means that the parameters above have to be configured prior to calling
+ * to this function.
+ *
+ * @dev rte_eth_dev handle
+ */
+void set_rx_function(struct rte_eth_dev *dev);
+
uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev);
+#ifdef RTE_IXGBE_INC_VECTOR
uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
int ixgbe_txq_vec_setup(struct igb_tx_queue *txq);
int ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq);
-int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev);
#endif
#endif
--
2.1.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 11:27 [dpdk-dev] [PATCH v2 0/6]: Add LRO support to ixgbe PMD Vlad Zolotarov
` (3 preceding siblings ...)
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 4/6] ixgbe: Unify the rx_pkt_bulk callback initialization Vlad Zolotarov
@ 2015-03-05 11:28 ` Vlad Zolotarov
2015-03-05 13:19 ` Thomas Monjalon
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 6/6] ixgbe: Add LRO support Vlad Zolotarov
5 siblings, 1 reply; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 11:28 UTC (permalink / raw)
To: dev
Enables LRO support in PMDs.
Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
config/common_linuxapp | 1 +
1 file changed, 1 insertion(+)
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 97f1c9e..5b98595 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
#
# Support NIC bypass logic
--
2.1.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option Vlad Zolotarov
@ 2015-03-05 13:19 ` Thomas Monjalon
2015-03-05 13:39 ` Vlad Zolotarov
0 siblings, 1 reply; 13+ messages in thread
From: Thomas Monjalon @ 2015-03-05 13:19 UTC (permalink / raw)
To: Vlad Zolotarov; +Cc: dev
2015-03-05 13:28, Vlad Zolotarov:
> Enables LRO support in PMDs.
>
> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> ---
> config/common_linuxapp | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 97f1c9e..5b98595 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
> CONFIG_RTE_LIBRTE_IEEE1588=n
> CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
> CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> +CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
Sorry I don't really follow this ixgbe discussion but I wonder why you
would add a compile time option for this feature.
What is the benefit of disabling it?
And if really needed, this patch would make more sense merged with the
code under ifdef.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 13:19 ` Thomas Monjalon
@ 2015-03-05 13:39 ` Vlad Zolotarov
2015-03-05 14:01 ` Thomas Monjalon
0 siblings, 1 reply; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 13:39 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
On 03/05/15 15:19, Thomas Monjalon wrote:
> 2015-03-05 13:28, Vlad Zolotarov:
>> Enables LRO support in PMDs.
>>
>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>> ---
>> config/common_linuxapp | 1 +
>> 1 file changed, 1 insertion(+)
>>
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 97f1c9e..5b98595 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
>> CONFIG_RTE_LIBRTE_IEEE1588=n
>> CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
>> CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
>> +CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
> Sorry I don't really follow this ixgbe discussion but I wonder why you
> would add a compile time option for this feature.
The only reason is to be able to detect that the feature is present in
the DPDK version u r compiling against because of the API change.
Currently, this can't be done using the DPDK version thus we may either
do a try-compilation and if it fails define some application-level macro
disabling
the feature usage or we may define a macro in the library level
(together with tons of other such macros like those in the patch snippet
above).
> What is the benefit of disabling it?
No benefit whatsoever.
> And if really needed, this patch would make more sense merged with the
> code under ifdef.
I strongly disagree - the amount of #ifdefs in the DPDK source is
absolutely enormous. It makes reading and understanding the code really
hard.
Therefore, I tried to reduce the amount of time the already existing
macros have to be queried (see PATCH4). And of course I don't see any
sense of adding new ones more than really needed. And in LRO case - it's
a single time, where the feature is manifested by the HW.
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 13:39 ` Vlad Zolotarov
@ 2015-03-05 14:01 ` Thomas Monjalon
2015-03-05 14:18 ` Vlad Zolotarov
0 siblings, 1 reply; 13+ messages in thread
From: Thomas Monjalon @ 2015-03-05 14:01 UTC (permalink / raw)
To: Vlad Zolotarov; +Cc: dev
2015-03-05 15:39, Vlad Zolotarov:
>
> On 03/05/15 15:19, Thomas Monjalon wrote:
> > 2015-03-05 13:28, Vlad Zolotarov:
> >> Enables LRO support in PMDs.
> >>
> >> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> >> ---
> >> config/common_linuxapp | 1 +
> >> 1 file changed, 1 insertion(+)
> >>
> >> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >> index 97f1c9e..5b98595 100644
> >> --- a/config/common_linuxapp
> >> +++ b/config/common_linuxapp
> >> @@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
> >> CONFIG_RTE_LIBRTE_IEEE1588=n
> >> CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
> >> CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> >> +CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
> >
> > Sorry I don't really follow this ixgbe discussion but I wonder why you
> > would add a compile time option for this feature.
>
> The only reason is to be able to detect that the feature is present in
> the DPDK version u r compiling against because of the API change.
> Currently, this can't be done using the DPDK version thus we may either
Why you cannot use version? In development phase?
When released, you'll be able to test >= 2.1.
> do a try-compilation and if it fails define some application-level macro
> disabling
> the feature usage or we may define a macro in the library level
> (together with tons of other such macros like those in the patch snippet
> above).
I'd prefer a request rte_eth_dev_info_get() to advertise that the feature
is available with the device and driver.
Please let's try to remove config options and #ifdefs.
> > What is the benefit of disabling it?
>
> No benefit whatsoever.
>
> > And if really needed, this patch would make more sense merged with the
> > code under ifdef.
>
> I strongly disagree - the amount of #ifdefs in the DPDK source is
> absolutely enormous. It makes reading and understanding the code really
> hard.
I agree. You misunderstand me.
I was just saying that this patch should be merged.
> Therefore, I tried to reduce the amount of time the already existing
> macros have to be queried (see PATCH4). And of course I don't see any
> sense of adding new ones more than really needed. And in LRO case - it's
> a single time, where the feature is manifested by the HW.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 14:01 ` Thomas Monjalon
@ 2015-03-05 14:18 ` Vlad Zolotarov
2015-03-05 19:13 ` Thomas Monjalon
0 siblings, 1 reply; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 14:18 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
On 03/05/15 16:01, Thomas Monjalon wrote:
> 2015-03-05 15:39, Vlad Zolotarov:
>> On 03/05/15 15:19, Thomas Monjalon wrote:
>>> 2015-03-05 13:28, Vlad Zolotarov:
>>>> Enables LRO support in PMDs.
>>>>
>>>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>>>> ---
>>>> config/common_linuxapp | 1 +
>>>> 1 file changed, 1 insertion(+)
>>>>
>>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>>>> index 97f1c9e..5b98595 100644
>>>> --- a/config/common_linuxapp
>>>> +++ b/config/common_linuxapp
>>>> @@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
>>>> CONFIG_RTE_LIBRTE_IEEE1588=n
>>>> CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
>>>> CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
>>>> +CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
>>> Sorry I don't really follow this ixgbe discussion but I wonder why you
>>> would add a compile time option for this feature.
>> The only reason is to be able to detect that the feature is present in
>> the DPDK version u r compiling against because of the API change.
>> Currently, this can't be done using the DPDK version thus we may either
> Why you cannot use version? In development phase?
> When released, you'll be able to test >= 2.1.
Of course! When the version bumps, the #ifdef I've mentioned above may
be replaced with the version check.
>
>> do a try-compilation and if it fails define some application-level macro
>> disabling
>> the feature usage or we may define a macro in the library level
>> (together with tons of other such macros like those in the patch snippet
>> above).
> I'd prefer a request rte_eth_dev_info_get() to advertise that the feature
> is available with the device and driver.
> Please let's try to remove config options and #ifdefs.
This is exactly what my patch does. But that's not ending there - there
is a new feature bit added in rte_eth_rxmode (enable_lro) and in order
to compile the application has to know somehow if this bit is present or
not. How do u propose to do this now?
Of course, I can put such macro in my own tree but then I'll have to
rebase all the time and inform other developers that will have to work
against my tree (and not upstream as it's supposed to be) - to update.
This sounds like a hassle thus I added this macro to resolve this issue
until the version is bumped.
>
>>> What is the benefit of disabling it?
>> No benefit whatsoever.
>>
>>> And if really needed, this patch would make more sense merged with the
>>> code under ifdef.
>> I strongly disagree - the amount of #ifdefs in the DPDK source is
>> absolutely enormous. It makes reading and understanding the code really
>> hard.
> I agree. You misunderstand me.
> I was just saying that this patch should be merged.
>
>> Therefore, I tried to reduce the amount of time the already existing
>> macros have to be queried (see PATCH4). And of course I don't see any
>> sense of adding new ones more than really needed. And in LRO case - it's
>> a single time, where the feature is manifested by the HW.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 14:18 ` Vlad Zolotarov
@ 2015-03-05 19:13 ` Thomas Monjalon
2015-03-05 20:41 ` Vladislav Zolotarov
0 siblings, 1 reply; 13+ messages in thread
From: Thomas Monjalon @ 2015-03-05 19:13 UTC (permalink / raw)
To: Vlad Zolotarov; +Cc: dev
2015-03-05 16:18, Vlad Zolotarov:
>
> On 03/05/15 16:01, Thomas Monjalon wrote:
> > 2015-03-05 15:39, Vlad Zolotarov:
> >> On 03/05/15 15:19, Thomas Monjalon wrote:
> >>> 2015-03-05 13:28, Vlad Zolotarov:
> >>>> Enables LRO support in PMDs.
> >>>>
> >>>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> >>>> ---
> >>>> config/common_linuxapp | 1 +
> >>>> 1 file changed, 1 insertion(+)
> >>>>
> >>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >>>> index 97f1c9e..5b98595 100644
> >>>> --- a/config/common_linuxapp
> >>>> +++ b/config/common_linuxapp
> >>>> @@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
> >>>> CONFIG_RTE_LIBRTE_IEEE1588=n
> >>>> CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
> >>>> CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> >>>> +CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
> >>> Sorry I don't really follow this ixgbe discussion but I wonder why you
> >>> would add a compile time option for this feature.
> >> The only reason is to be able to detect that the feature is present in
> >> the DPDK version u r compiling against because of the API change.
> >> Currently, this can't be done using the DPDK version thus we may either
> > Why you cannot use version? In development phase?
> > When released, you'll be able to test >= 2.1.
>
> Of course! When the version bumps, the #ifdef I've mentioned above may
> be replaced with the version check.
>
> >
> >> do a try-compilation and if it fails define some application-level macro
> >> disabling
> >> the feature usage or we may define a macro in the library level
> >> (together with tons of other such macros like those in the patch snippet
> >> above).
> > I'd prefer a request rte_eth_dev_info_get() to advertise that the feature
> > is available with the device and driver.
> > Please let's try to remove config options and #ifdefs.
>
> This is exactly what my patch does. But that's not ending there - there
> is a new feature bit added in rte_eth_rxmode (enable_lro) and in order
> to compile the application has to know somehow if this bit is present or
> not. How do u propose to do this now?
I think it would be better to define something like RTE_HAS_LRO in rte_ethdev.h.
> Of course, I can put such macro in my own tree but then I'll have to
> rebase all the time and inform other developers that will have to work
> against my tree (and not upstream as it's supposed to be) - to update.
> This sounds like a hassle thus I added this macro to resolve this issue
> until the version is bumped.
>
> >
> >>> What is the benefit of disabling it?
> >> No benefit whatsoever.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option
2015-03-05 19:13 ` Thomas Monjalon
@ 2015-03-05 20:41 ` Vladislav Zolotarov
0 siblings, 0 replies; 13+ messages in thread
From: Vladislav Zolotarov @ 2015-03-05 20:41 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
On Mar 5, 2015 9:14 PM, "Thomas Monjalon" <thomas.monjalon@6wind.com> wrote:
>
> 2015-03-05 16:18, Vlad Zolotarov:
> >
> > On 03/05/15 16:01, Thomas Monjalon wrote:
> > > 2015-03-05 15:39, Vlad Zolotarov:
> > >> On 03/05/15 15:19, Thomas Monjalon wrote:
> > >>> 2015-03-05 13:28, Vlad Zolotarov:
> > >>>> Enables LRO support in PMDs.
> > >>>>
> > >>>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> > >>>> ---
> > >>>> config/common_linuxapp | 1 +
> > >>>> 1 file changed, 1 insertion(+)
> > >>>>
> > >>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
> > >>>> index 97f1c9e..5b98595 100644
> > >>>> --- a/config/common_linuxapp
> > >>>> +++ b/config/common_linuxapp
> > >>>> @@ -137,6 +137,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
> > >>>> CONFIG_RTE_LIBRTE_IEEE1588=n
> > >>>> CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
> > >>>> CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> > >>>> +CONFIG_RTE_ETHDEV_LRO_SUPPORT=y
> > >>> Sorry I don't really follow this ixgbe discussion but I wonder why
you
> > >>> would add a compile time option for this feature.
> > >> The only reason is to be able to detect that the feature is present
in
> > >> the DPDK version u r compiling against because of the API change.
> > >> Currently, this can't be done using the DPDK version thus we may
either
> > > Why you cannot use version? In development phase?
> > > When released, you'll be able to test >= 2.1.
> >
> > Of course! When the version bumps, the #ifdef I've mentioned above may
> > be replaced with the version check.
> >
> > >
> > >> do a try-compilation and if it fails define some application-level
macro
> > >> disabling
> > >> the feature usage or we may define a macro in the library level
> > >> (together with tons of other such macros like those in the patch
snippet
> > >> above).
> > > I'd prefer a request rte_eth_dev_info_get() to advertise that the
feature
> > > is available with the device and driver.
> > > Please let's try to remove config options and #ifdefs.
> >
> > This is exactly what my patch does. But that's not ending there - there
> > is a new feature bit added in rte_eth_rxmode (enable_lro) and in order
> > to compile the application has to know somehow if this bit is present or
> > not. How do u propose to do this now?
>
> I think it would be better to define something like RTE_HAS_LRO in
rte_ethdev.h.
Ok. So i'll change it in v4.
>
> > Of course, I can put such macro in my own tree but then I'll have to
> > rebase all the time and inform other developers that will have to work
> > against my tree (and not upstream as it's supposed to be) - to update.
> > This sounds like a hassle thus I added this macro to resolve this issue
> > until the version is bumped.
> >
> > >
> > >>> What is the benefit of disabling it?
> > >> No benefit whatsoever.
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2 6/6] ixgbe: Add LRO support
2015-03-05 11:27 [dpdk-dev] [PATCH v2 0/6]: Add LRO support to ixgbe PMD Vlad Zolotarov
` (4 preceding siblings ...)
2015-03-05 11:28 ` [dpdk-dev] [PATCH v2 5/6] common_linuxapp: Added CONFIG_RTE_ETHDEV_LRO_SUPPORT option Vlad Zolotarov
@ 2015-03-05 11:28 ` Vlad Zolotarov
5 siblings, 0 replies; 13+ messages in thread
From: Vlad Zolotarov @ 2015-03-05 11:28 UTC (permalink / raw)
To: dev
- Only x540 and 82599 devices support LRO.
- Add the appropriate HW configuration.
- Add RSC aware rx_pkt_burst() handlers:
- Implemented bulk allocation and non-bulk allocation versions.
- Add LRO-specific fields to rte_eth_rxmode, to rte_eth_dev_data
and to igb_rx_queue.
- Use the appropriate handler when LRO is requested.
Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
New in v2:
- Removed rte_eth_dev_data.lro_bulk_alloc.
- Fixed a few styling and spelling issues.
---
lib/librte_ether/rte_ethdev.h | 6 +-
lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 8 +
lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 5 +
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 562 +++++++++++++++++++++++++++++++++++-
lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 6 +
5 files changed, 580 insertions(+), 7 deletions(-)
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 8db3127..9e7160a 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -320,14 +320,15 @@ struct rte_eth_rxmode {
enum rte_eth_rx_mq_mode mq_mode;
uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */
uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/
- uint8_t header_split : 1, /**< Header Split enable. */
+ uint16_t header_split : 1, /**< Header Split enable. */
hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */
hw_vlan_filter : 1, /**< VLAN filter enable. */
hw_vlan_strip : 1, /**< VLAN strip enable. */
hw_vlan_extend : 1, /**< Extended VLAN enable. */
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
- enable_scatter : 1; /**< Enable scatter packets rx handler */
+ enable_scatter : 1, /**< Enable scatter packets rx handler */
+ enable_lro : 1; /**< Enable LRO */
};
/**
@@ -1515,6 +1516,7 @@ struct rte_eth_dev_data {
uint8_t port_id; /**< Device [external] port identifier. */
uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */
+ lro : 1, /**< RX LRO is ON(1) / OFF(0) */
all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
dev_started : 1; /**< Device state: STARTED(1) / STOPPED(0). */
};
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index f93dcfc..a943e0b 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -1650,6 +1650,7 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
/* Clear stored conf */
dev->data->scattered_rx = 0;
+ dev->data->lro = 0;
hw->rx_bulk_alloc_allowed = false;
/* Clear recorded link status */
@@ -2019,6 +2020,13 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
DEV_RX_OFFLOAD_IPV4_CKSUM |
DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM;
+
+#ifdef RTE_ETHDEV_LRO_SUPPORT
+ if (hw->mac.type == ixgbe_mac_82599EB ||
+ hw->mac.type == ixgbe_mac_X540)
+ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
+#endif
+
dev_info->tx_offload_capa =
DEV_TX_OFFLOAD_VLAN_INSERT |
DEV_TX_OFFLOAD_IPV4_CKSUM |
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index a549f5c..e206584 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -349,6 +349,11 @@ uint16_t ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t ixgbe_recv_scattered_pkts(void *rx_queue,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t ixgbe_recv_pkts_lro(void *rx_queue,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index bdac885..f5525e5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -1366,6 +1366,15 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
}
/**
+ * Detect an RSC descriptor.
+ */
+static inline uint32_t ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
+{
+ return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
+ IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
+}
+
+/**
* Initialize the first mbuf of the returned packet:
* - RX port identifier,
* - hardware offload data, if any:
@@ -1410,6 +1419,291 @@ static inline void ixgbe_fill_cluster_head_buf(
}
}
+/**
+ * Bulk receive handler for and LRO case.
+ *
+ * @rx_queue Rx queue handle
+ * @rx_pkts table of received packets
+ * @nb_pkts size of rx_pkts table
+ * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
+ *
+ * Handles the Rx HW ring completions when RSC feature is configured. Uses an
+ * additional ring of igb_rsc_entry's that will hold the relevant RSC info.
+ *
+ * We use the same logic as in Lunux and in FreeBSD ixgbe drivers:
+ * 1) When non-EOP RSC completion arrives:
+ * a) Update the HEAD of the current RSC aggregation cluster with the new
+ * segment's data length.
+ * b) Set the "next" pointer of the current segment to point to the segment
+ * at the NEXTP index.
+ * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
+ * in the sw_rsc_ring.
+ * 2) When EOP arrives we just update the cluster's total length and offload
+ * flags and deliver the cluster up to the upper layers. In our case - put it
+ * in the rx_pkts table.
+ *
+ * Returns the number of received packets/clusters (according to the "bulk
+ * receive" interface).
+ */
+static inline uint16_t
+_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
+ bool bulk_alloc)
+{
+ struct igb_rx_queue *rxq = rx_queue;
+ volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
+ struct igb_rx_entry *sw_ring = rxq->sw_ring;
+ struct igb_rsc_entry *sw_rsc_ring = rxq->sw_rsc_ring;
+ uint16_t rx_id = rxq->rx_tail;
+ uint16_t nb_rx = 0;
+ uint16_t nb_hold = rxq->nb_rx_hold;
+ uint16_t prev_id = rxq->rx_tail;
+
+ while (nb_rx < nb_pkts) {
+ bool eop;
+ struct igb_rx_entry *rxe;
+ struct igb_rsc_entry *rsc_entry;
+ struct igb_rsc_entry *next_rsc_entry;
+ struct igb_rx_entry *next_rxe;
+ struct rte_mbuf *first_seg;
+ struct rte_mbuf *rxm;
+ struct rte_mbuf *nmb;
+ union ixgbe_adv_rx_desc rxd;
+ uint16_t data_len;
+ uint16_t next_id;
+ volatile union ixgbe_adv_rx_desc *rxdp;
+ uint32_t staterr;
+
+next_desc:
+ /*
+ * The code in this whole file uses the volatile pointer to
+ * ensure the read ordering of the status and the rest of the
+ * descriptor fields (on the compiler level only!!!). This is so
+ * UGLY - why not to just use the compiler barrier instead? DPDK
+ * even has the rte_compiler_barrier() for that.
+ *
+ * But most importantly this is just wrong because this doesn't
+ * ensure memory ordering in a general case at all. For
+ * instance, DPDK is supposed to work on Power CPUs where
+ * compiler barrier may just not be enough!
+ *
+ * I tried to write only this function properly to have a
+ * starting point (as a part of an LRO/RSC series) but the
+ * compiler cursed at me when I tried to cast away the
+ * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
+ * keeping it the way it is for now.
+ *
+ * The code in this file is broken in so many other places and
+ * will just not work on a big endian CPU anyway therefore the
+ * lines below will have to be revisited together with the rest
+ * of the ixgbe PMD.
+ *
+ * TODO:
+ * - Get rid of "volatile" crap and let the compiler do its
+ * job.
+ * - Use the proper memory barrier (rte_rmb()) to ensure the
+ * memory ordering below.
+ */
+ rxdp = &rx_ring[rx_id];
+ staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
+
+ if (!(staterr & IXGBE_RXDADV_STAT_DD))
+ break;
+
+ rxd = *rxdp;
+
+ PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
+ "staterr=0x%x data_len=%u",
+ rxq->port_id, rxq->queue_id, rx_id, staterr,
+ rte_le_to_cpu_16(rxd.wb.upper.length));
+
+ if (!bulk_alloc) {
+ nmb = rte_rxmbuf_alloc(rxq->mb_pool);
+ if (nmb == NULL) {
+ PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
+ "port_id=%u queue_id=%u",
+ rxq->port_id, rxq->queue_id);
+
+ rte_eth_devices[rxq->port_id].data->
+ rx_mbuf_alloc_failed++;
+ break;
+ }
+ } else if (nb_hold > rxq->rx_free_thresh) {
+ uint16_t next_rdt = rxq->rx_free_trigger;
+
+ if (!ixgbe_rx_alloc_bufs(rxq, false)) {
+ rte_wmb();
+ IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
+ next_rdt);
+ nb_hold -= rxq->rx_free_thresh;
+ } else {
+ PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
+ "port_id=%u queue_id=%u",
+ rxq->port_id, rxq->queue_id);
+
+ rte_eth_devices[rxq->port_id].data->
+ rx_mbuf_alloc_failed++;
+ break;
+ }
+ }
+
+ nb_hold++;
+ rxe = &sw_ring[rx_id];
+ eop = staterr & IXGBE_RXDADV_STAT_EOP;
+
+ next_id = rx_id + 1;
+ if (next_id == rxq->nb_rx_desc)
+ next_id = 0;
+
+ /* Prefetch next mbuf while processing current one. */
+ rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
+
+ /*
+ * When next RX descriptor is on a cache-line boundary,
+ * prefetch the next 4 RX descriptors and the next 4 pointers
+ * to mbufs.
+ */
+ if ((next_id & 0x3) == 0) {
+ rte_ixgbe_prefetch(&rx_ring[next_id]);
+ rte_ixgbe_prefetch(&sw_ring[next_id]);
+ }
+
+ rxm = rxe->mbuf;
+
+ if (!bulk_alloc) {
+ __le64 dma =
+ rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
+ /*
+ * Update RX descriptor with the physical address of the
+ * new data buffer of the new allocated mbuf.
+ */
+ rxe->mbuf = nmb;
+
+ rxm->data_off = RTE_PKTMBUF_HEADROOM;
+ rxdp->read.hdr_addr = dma;
+ rxdp->read.pkt_addr = dma;
+ }
+ /*
+ * Set data length & data buffer address of mbuf.
+ */
+ data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
+ rxm->data_len = data_len;
+
+ if (!eop) {
+ uint16_t nextp_id;
+ /*
+ * Get next descriptor index:
+ * - For RSC it's in the NEXTP field.
+ * - For a scattered packet - it's just a following
+ * descriptor.
+ */
+ if (ixgbe_rsc_count(&rxd))
+ nextp_id =
+ (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
+ IXGBE_RXDADV_NEXTP_SHIFT;
+ else
+ nextp_id = next_id;
+
+ next_rsc_entry = &sw_rsc_ring[nextp_id];
+ next_rxe = &sw_ring[nextp_id];
+ rte_ixgbe_prefetch(next_rxe);
+ }
+
+ rsc_entry = &sw_rsc_ring[rx_id];
+ first_seg = rsc_entry->fbuf;
+ rsc_entry->fbuf = NULL;
+
+ /*
+ * If this is the first buffer of the received packet,
+ * set the pointer to the first mbuf of the packet and
+ * initialize its context.
+ * Otherwise, update the total length and the number of segments
+ * of the current scattered packet, and update the pointer to
+ * the last mbuf of the current packet.
+ */
+ if (first_seg == NULL) {
+ first_seg = rxm;
+ first_seg->pkt_len = data_len;
+ first_seg->nb_segs = 1;
+ } else {
+ first_seg->pkt_len += data_len;
+ first_seg->nb_segs++;
+ }
+
+ prev_id = rx_id;
+ rx_id = next_id;
+
+ /*
+ * If this is not the last buffer of the received packet, update
+ * the pointer to the first mbuf at the NEXTP entry in the
+ * sw_rsc_ring and continue to parse the RX ring.
+ */
+ if (!eop) {
+ rxm->next = next_rxe->mbuf;
+ next_rsc_entry->fbuf = first_seg;
+ goto next_desc;
+ }
+
+ /*
+ * This is the last buffer of the received packet - return
+ * the current cluster to the user.
+ */
+ rxm->next = NULL;
+
+ /* Initialize the first mbuf of the returned packet */
+ ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
+ staterr);
+
+ /* Prefetch data of first segment, if configured to do so. */
+ rte_packet_prefetch((char *)first_seg->buf_addr +
+ first_seg->data_off);
+
+ /*
+ * Store the mbuf address into the next entry of the array
+ * of returned packets.
+ */
+ rx_pkts[nb_rx++] = first_seg;
+ }
+
+ /*
+ * Record index of the next RX descriptor to probe.
+ */
+ rxq->rx_tail = rx_id;
+
+ /*
+ * If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register.
+ * Update the RDT with the value of the last processed RX descriptor
+ * minus 1, to guarantee that the RDT register is never equal to the
+ * RDH register, which creates a "full" ring situtation from the
+ * hardware point of view...
+ */
+ if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
+ PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
+ "nb_hold=%u nb_rx=%u",
+ rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
+
+ IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
+ nb_hold = 0;
+ }
+
+ rxq->nb_rx_hold = nb_hold;
+ return nb_rx;
+}
+
+uint16_t
+ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ return _recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
+}
+
+uint16_t
+ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ return _recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
+}
+
uint16_t
ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -2022,6 +2316,7 @@ ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
if (rxq != NULL) {
ixgbe_rx_queue_release_mbufs(rxq);
rte_free(rxq->sw_ring);
+ rte_free(rxq->sw_rsc_ring);
rte_free(rxq);
}
}
@@ -2144,6 +2439,7 @@ ixgbe_reset_rx_queue(struct ixgbe_hw *hw, struct igb_rx_queue *rxq)
rxq->nb_rx_hold = 0;
rxq->pkt_first_seg = NULL;
rxq->pkt_last_seg = NULL;
+ rxq->rsc_en = 0;
}
int
@@ -2158,6 +2454,14 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
struct igb_rx_queue *rxq;
struct ixgbe_hw *hw;
uint16_t len;
+ struct rte_eth_dev_info dev_info = { 0 };
+ struct rte_eth_rxmode *dev_rx_mode = &dev->data->dev_conf.rxmode;
+ bool rsc_requested = false;
+
+ dev->dev_ops->dev_infos_get(dev, &dev_info);
+ if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) &&
+ dev_rx_mode->enable_lro)
+ rsc_requested = true;
PMD_INIT_FUNC_TRACE();
hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -2248,12 +2552,28 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
sizeof(struct igb_rx_entry) * len,
RTE_CACHE_LINE_SIZE, socket_id);
- if (rxq->sw_ring == NULL) {
+ if (!rxq->sw_ring) {
ixgbe_rx_queue_release(rxq);
return (-ENOMEM);
}
- PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
- rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
+
+ if (rsc_requested) {
+ rxq->sw_rsc_ring =
+ rte_zmalloc_socket("rxq->sw_rsc_ring",
+ sizeof(struct igb_rsc_entry) * len,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (!rxq->sw_rsc_ring) {
+ ixgbe_rx_queue_release(rxq);
+ return (-ENOMEM);
+ }
+ } else {
+ rxq->sw_rsc_ring = NULL;
+ }
+
+ PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_rsc_ring=%p hw_ring=%p "
+ "dma_addr=0x%"PRIx64,
+ rxq->sw_ring, rxq->sw_rsc_ring, rxq->rx_ring,
+ rxq->rx_ring_phys_addr);
/*
* Certain constraints must be met in order to use the bulk buffer
@@ -3506,6 +3826,84 @@ ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
return 0;
}
+/**
+ * get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
+ *
+ * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
+ * spec rev. 3.0 chapter 8.2.3.8.13.
+ *
+ * @pool Memory pool of the Rx queue
+ */
+static inline uint32_t get_rscctl_maxdesc(struct rte_mempool *pool)
+{
+ struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
+
+ /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
+ uint16_t maxdesc =
+ 65535 / (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
+
+ if (maxdesc >= 16)
+ return IXGBE_RSCCTL_MAXDESC_16;
+ else if (maxdesc >= 8)
+ return IXGBE_RSCCTL_MAXDESC_8;
+ else if (maxdesc >= 4)
+ return IXGBE_RSCCTL_MAXDESC_4;
+ else
+ return IXGBE_RSCCTL_MAXDESC_1;
+}
+
+/* (Taken from FreeBSD tree)
+** Setup the correct IVAR register for a particular MSIX interrupt
+** (yes this is all very magic and confusing :)
+** - entry is the register array entry
+** - vector is the MSIX vector for this queue
+** - type is RX/TX/MISC
+*/
+static void
+ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
+{
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ u32 ivar, index;
+
+ vector |= IXGBE_IVAR_ALLOC_VAL;
+
+ switch (hw->mac.type) {
+
+ case ixgbe_mac_82598EB:
+ if (type == -1)
+ entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
+ else
+ entry += (type * 64);
+ index = (entry >> 2) & 0x1F;
+ ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
+ ivar &= ~(0xFF << (8 * (entry & 0x3)));
+ ivar |= (vector << (8 * (entry & 0x3)));
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
+ break;
+
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ if (type == -1) { /* MISC IVAR */
+ index = (entry & 1) * 8;
+ ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
+ ivar &= ~(0xFF << index);
+ ivar |= (vector << index);
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
+ } else { /* RX/TX IVARS */
+ index = (16 * (entry & 1)) + (8 * type);
+ ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
+ ivar &= ~(0xFF << index);
+ ivar |= (vector << index);
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
+ }
+
+ break;
+
+ default:
+ break;
+ }
+}
+
void set_rx_function(struct rte_eth_dev *dev)
{
struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -3555,6 +3953,25 @@ void set_rx_function(struct rte_eth_dev *dev)
dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
}
}
+
+ /*
+ * Initialize the appropriate LRO callback.
+ *
+ * If all queues satisfy the bulk allocation preconditions
+ * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
+ * Otherwise use a single allocation version.
+ */
+ if (dev->data->lro) {
+ if (hw->rx_bulk_alloc_allowed) {
+ PMD_INIT_LOG(INFO, "LRO is requested. Using a bulk "
+ "allocation version");
+ dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
+ } else {
+ PMD_INIT_LOG(INFO, "LRO is requested. Using a single "
+ "allocation version");
+ dev->rx_pkt_burst = ixgbe_recv_pkts_lro;
+ }
+ }
}
/*
@@ -3573,10 +3990,26 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
uint32_t maxfrs;
uint32_t srrctl;
uint32_t rdrxctl;
+ uint32_t rscctl;
+ uint32_t psrtype;
+ uint32_t rfctl;
uint32_t rxcsum;
uint16_t buf_size;
uint16_t i;
struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
+ struct rte_eth_dev_info dev_info = { 0 };
+ bool rsc_capable = false;
+
+ /* Sanity check */
+ dev->dev_ops->dev_infos_get(dev, &dev_info);
+ if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
+ rsc_capable = true;
+
+ if (!rsc_capable && rx_conf->enable_lro) {
+ PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
+ "support it");
+ return -EINVAL;
+ }
PMD_INIT_FUNC_TRACE();
hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -3596,13 +4029,44 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
/*
+ * RFCTL configuration
+ *
+ * Since NFS packets coalescing is not supported - clear RFCTL.NFSW_DIS
+ * and RFCTL.NFSR_DIS when RSC is enabled.
+ */
+ if (rsc_capable) {
+ rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
+ if (rx_conf->enable_lro) {
+ rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
+ IXGBE_RFCTL_NFSR_DIS);
+ } else {
+ rfctl |= IXGBE_RFCTL_RSC_DIS;
+ }
+
+ IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
+ }
+
+
+ /*
* Configure CRC stripping, if any.
*/
hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
if (rx_conf->hw_strip_crc)
hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
- else
+ else {
hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
+ if (rx_conf->enable_lro) {
+ /*
+ * According to chapter of 4.6.7.2.1 of the Spec Rev.
+ * 3.0 RSC configuration requires HW CRC stripping being
+ * enabled. If user requested both HW CRC stripping off
+ * and RSC on - return an error.
+ */
+ PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
+ "is disabled");
+ return -EINVAL;
+ }
+ }
/*
* Configure jumbo frame support, if any.
@@ -3654,9 +4118,18 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
* Configure Header Split
*/
if (rx_conf->header_split) {
+ /*
+ * Print a warning if split_hdr_size is less
+ * than 128 bytes when RSC is requested.
+ */
+ if (rx_conf->enable_lro &&
+ rx_conf->split_hdr_size < 128)
+ PMD_INIT_LOG(INFO, "split_hdr_size less than "
+ "128 bytes (%d)!",
+ rx_conf->split_hdr_size);
+
if (hw->mac.type == ixgbe_mac_82599EB) {
/* Must setup the PSRTYPE register */
- uint32_t psrtype;
psrtype = IXGBE_PSRTYPE_TCPHDR |
IXGBE_PSRTYPE_UDPHDR |
IXGBE_PSRTYPE_IPV4HDR |
@@ -3669,7 +4142,20 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
} else
#endif
+ {
srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
+ /*
+ * Following the 4.6.7.2.1 chapter of the 82599/x540
+ * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
+ * should be configured even if header split is not
+ * enabled. In the later case we will configure it 128
+ * bytes following the recommendation in the spec.
+ */
+ if (rx_conf->enable_lro)
+ srrctl |=
+ ((128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+ IXGBE_SRRCTL_BSIZEHDR_MASK);
+ }
/* Set if packets are dropped when no descriptors available */
if (rxq->drop_en)
@@ -3686,6 +4172,13 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
RTE_PKTMBUF_HEADROOM);
srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
IXGBE_SRRCTL_BSIZEPKT_MASK);
+
+ /*
+ * TODO: Consider setting the Receive Descriptor Minimum
+ * Threshold Size for and RSC case. This is not an obviously
+ * beneficiary option but the one worth considering...
+ */
+
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
@@ -3698,11 +4191,57 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "forcing scatter mode");
dev->data->scattered_rx = 1;
}
+
+ /* RSC per-queue configuration */
+ if (rx_conf->enable_lro) {
+ uint32_t eitr;
+
+ rscctl =
+ IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
+ psrtype =
+ IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
+ eitr = IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
+
+ rscctl |= IXGBE_RSCCTL_RSCEN;
+ rscctl |= get_rscctl_maxdesc(rxq->mb_pool);
+ psrtype |= IXGBE_PSRTYPE_TCPHDR;
+
+ /*
+ * RSC: Set ITR interval corresponding to 2K ints/s.
+ *
+ * Full-sized RSC aggregations for a 10Gb/s link will
+ * arrive at about 20K aggregation/s rate.
+ *
+ * 2K inst/s rate will make only 10% of the
+ * aggregations to be closed due to the interrupt timer
+ * expiration for a streaming at wire-speed case.
+ *
+ * For a sparse streaming case this setting will yield
+ * at most 500us latency for a single RSC aggregation.
+ */
+ eitr |= (2000 | IXGBE_EITR_CNT_WDIS);
+
+ IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
+ IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx),
+ psrtype);
+ IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
+
+ /*
+ * RSC requires the mapping of the queue to the
+ * interrupt vector.
+ */
+ ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
+
+ rxq->rsc_en = 1;
+ }
}
if (rx_conf->enable_scatter)
dev->data->scattered_rx = 1;
+ if (rx_conf->enable_lro)
+ dev->data->lro = 1;
+
set_rx_function(dev);
/*
@@ -3735,6 +4274,19 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
}
+ /* Finalize RSC configuration */
+ if (rx_conf->enable_lro) {
+ /*
+ * Follow the instructions in the 4.6.7.2.1 of the Spec Rev. 3.0
+ */
+ rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+ rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
+ IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
+
+ PMD_INIT_LOG(INFO, "enabling LRO mode");
+ }
+
+
return 0;
}
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
index 18c9154..09be70e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
@@ -79,6 +79,10 @@ struct igb_rx_entry {
struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
};
+struct igb_rsc_entry {
+ struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
+};
+
/**
* Structure associated with each descriptor of the TX ring of a TX queue.
*/
@@ -105,6 +109,7 @@ struct igb_rx_queue {
volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
+ struct igb_rsc_entry *sw_rsc_ring; /**< address of RSC software ring. */
struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
uint64_t mbuf_initializer; /**< value to init mbufs */
@@ -126,6 +131,7 @@ struct igb_rx_queue {
uint8_t port_id; /**< Device port identifier. */
uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
+ uint8_t rsc_en; /**< If not 0, RSC is enabled. */
uint8_t rx_deferred_start; /**< not in global dev start. */
#ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
--
2.1.0
^ permalink raw reply [flat|nested] 13+ messages in thread