* [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM
@ 2016-06-27 11:54 Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 1/4] virtio: Fix compile time dependency of use_simple_rxtx usage Jerin Jacob
` (5 more replies)
0 siblings, 6 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-06-27 11:54 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
This patchset includes,
1) General cleanup on compile time dependency of use_simple_rxtx with RTE_MACHINE_CPUFLAG_SSSE3
2) Added NEON support for optimized Rx handling
This patchset is based on dpdk-next-virtio/master at a1d8bd4911b28e32c35f16ab2ff3e22180d1f1d7
Jerin Jacob (4):
virtio: Fix compile time dependency of use_simple_rxtx usage
virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
virtio: move SSE based Rx implementation to separate file
virtio: add neon support
MAINTAINERS | 1 +
config/common_base | 1 +
config/defconfig_arm-armv7a-linuxapp-gcc | 1 +
config/defconfig_ppc_64-power8-linuxapp-gcc | 1 +
config/defconfig_tile-tilegx-linuxapp-gcc | 1 +
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/virtio/Makefile | 3 -
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 32 ++--
drivers/net/virtio/virtio_rxtx.h | 3 +-
drivers/net/virtio/virtio_rxtx_simple.c | 168 +------------------
drivers/net/virtio/virtio_rxtx_simple_neon.h | 238 +++++++++++++++++++++++++++
drivers/net/virtio/virtio_rxtx_simple_sse.h | 225 +++++++++++++++++++++++++
drivers/net/virtio/virtio_user_ethdev.c | 1 +
14 files changed, 490 insertions(+), 188 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.h
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.h
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH 1/4] virtio: Fix compile time dependency of use_simple_rxtx usage
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
@ 2016-06-27 11:54 ` Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR Jerin Jacob
` (4 subsequent siblings)
5 siblings, 0 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-06-27 11:54 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Removed unnecessary compile time dependency on "use_simple_rxtx".
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/Makefile | 3 ---
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 28 +++++++++-------------------
drivers/net/virtio/virtio_rxtx.h | 3 +--
drivers/net/virtio/virtio_rxtx_simple.c | 8 ++++++--
drivers/net/virtio/virtio_user_ethdev.c | 1 +
6 files changed, 18 insertions(+), 26 deletions(-)
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 43de46c..114d40e 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,10 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE3)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
-endif
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index dd7693f..b8295a7 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -253,6 +253,7 @@ struct virtio_hw {
uint8_t use_msix;
uint8_t started;
uint8_t modern;
+ uint8_t use_simple_rxtx;
uint8_t mac_addr[ETHER_ADDR_LEN];
uint32_t notify_off_multiplier;
uint8_t *isr;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index a27208e..63b53f7 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -67,10 +67,6 @@
#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
ETH_TXQ_FLAGS_NOOFFLOADS)
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-static int use_simple_rxtx;
-#endif
-
static void
vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
{
@@ -333,6 +329,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
*/
uint16_t i;
uint16_t desc_idx;
+ struct virtio_hw *hw = dev->data->dev_private;
PMD_INIT_FUNC_TRACE();
@@ -353,8 +350,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
nbufs = 0;
error = ENOSPC;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx) {
+ if (hw->use_simple_rxtx) {
for (desc_idx = 0; desc_idx < vq->vq_nentries;
desc_idx++) {
vq->vq_ring.avail->ring[desc_idx] = desc_idx;
@@ -362,7 +358,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
VRING_DESC_F_WRITE;
}
}
-#endif
+
memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
desc_idx++) {
@@ -378,12 +374,11 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
/******************************************
* Enqueue allocated buffers *
*******************************************/
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx)
+ if (hw->use_simple_rxtx)
error = virtqueue_enqueue_recv_refill_simple(vq, m);
else
-#endif
error = virtqueue_enqueue_recv_refill(vq, m);
+
if (error) {
rte_pktmbuf_free(m);
break;
@@ -404,8 +399,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
struct virtqueue *vq = txvq->vq;
virtio_dev_vring_start(vq);
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx) {
+ if (hw->use_simple_rxtx) {
uint16_t mid_idx = vq->vq_nentries >> 1;
for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
@@ -426,7 +420,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
desc_idx++)
vq->vq_ring.avail->ring[desc_idx] = desc_idx;
}
-#endif
+
VIRTQUEUE_DUMP(vq);
}
}
@@ -456,9 +450,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
dev->data->rx_queues[queue_idx] = rxvq;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
virtio_rxq_vec_setup(rxvq);
-#endif
return 0;
}
@@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
{
uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- struct virtio_hw *hw = dev->data->dev_private;
-#endif
struct virtnet_tx *txvq;
struct virtqueue *vq;
uint16_t tx_free_thresh;
@@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+ struct virtio_hw *hw = dev->data->dev_private;
/* Use simple rx/tx func if single segment and no offloads */
if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO, "Using simple rx/tx path");
dev->tx_pkt_burst = virtio_xmit_pkts_simple;
dev->rx_pkt_burst = virtio_recv_pkts_vec;
- use_simple_rxtx = 1;
+ hw->use_simple_rxtx = 1;
}
#endif
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 058b56a..28f82d6 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -86,10 +86,9 @@ struct virtnet_ctl {
const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
};
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
-#endif
+
#endif /* _VIRTIO_RXTX_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 242ad90..67430da 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -37,8 +37,6 @@
#include <string.h>
#include <errno.h>
-#include <tmmintrin.h>
-
#include <rte_cycles.h>
#include <rte_memory.h>
#include <rte_memzone.h>
@@ -131,6 +129,10 @@ virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
vq_update_avail_idx(vq);
}
+#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+
+#include <tmmintrin.h>
+
/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
*
* This routine is for non-mergeable RX, one desc for each guest buffer.
@@ -293,6 +295,8 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_pkts_received;
}
+#endif
+
#define VIRTIO_TX_FREE_THRESH 32
#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
#define VIRTIO_TX_FREE_NR 32
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 9216182..ce6ce91 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -303,6 +303,7 @@ virtio_user_eth_dev_alloc(const char *name)
hw->vtpci_ops = &virtio_user_ops;
hw->use_msix = 0;
hw->modern = 0;
+ hw->use_simple_rxtx = 0;
hw->virtio_user_dev = dev;
data->dev_private = hw;
data->numa_node = SOCKET_ID_ANY;
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 1/4] virtio: Fix compile time dependency of use_simple_rxtx usage Jerin Jacob
@ 2016-06-27 11:54 ` Jerin Jacob
2016-06-27 14:19 ` Thomas Monjalon
2016-06-27 11:54 ` [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
` (3 subsequent siblings)
5 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-06-27 11:54 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
like other PMD drivers, introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
for vector based handler selection in virtio
Enabled by default in common config and disabled for non X86
platforms
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
config/common_base | 1 +
config/defconfig_arm-armv7a-linuxapp-gcc | 1 +
config/defconfig_arm64-armv8a-linuxapp-gcc | 1 +
config/defconfig_ppc_64-power8-linuxapp-gcc | 1 +
config/defconfig_tile-tilegx-linuxapp-gcc | 1 +
drivers/net/virtio/virtio_rxtx.c | 2 ++
6 files changed, 7 insertions(+)
diff --git a/config/common_base b/config/common_base
index 3a04fba..f6ce168 100644
--- a/config/common_base
+++ b/config/common_base
@@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
+CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
#
# Compile burst-oriented VMXNET3 PMD driver
diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc
index bde6acd..a249ad5 100644
--- a/config/defconfig_arm-armv7a-linuxapp-gcc
+++ b/config/defconfig_arm-armv7a-linuxapp-gcc
@@ -75,3 +75,4 @@ CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
CONFIG_RTE_LIBRTE_PMD_BNX2X=n
CONFIG_RTE_LIBRTE_QEDE_PMD=n
+CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=n
diff --git a/config/defconfig_arm64-armv8a-linuxapp-gcc b/config/defconfig_arm64-armv8a-linuxapp-gcc
index a786562..95ed30e 100644
--- a/config/defconfig_arm64-armv8a-linuxapp-gcc
+++ b/config/defconfig_arm64-armv8a-linuxapp-gcc
@@ -48,5 +48,6 @@ CONFIG_RTE_IXGBE_INC_VECTOR=n
CONFIG_RTE_LIBRTE_IVSHMEM=n
CONFIG_RTE_LIBRTE_FM10K_PMD=n
CONFIG_RTE_LIBRTE_I40E_PMD=n
+CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=n
CONFIG_RTE_SCHED_VECTOR=n
diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc b/config/defconfig_ppc_64-power8-linuxapp-gcc
index bef8f49..1eca73a 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -51,6 +51,7 @@ CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n
CONFIG_RTE_LIBRTE_IXGBE_PMD=n
CONFIG_RTE_LIBRTE_I40E_PMD=n
CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=n
CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
CONFIG_RTE_LIBRTE_PMD_BOND=n
CONFIG_RTE_LIBRTE_ENIC_PMD=n
diff --git a/config/defconfig_tile-tilegx-linuxapp-gcc b/config/defconfig_tile-tilegx-linuxapp-gcc
index 5a50793..0d6fe1e 100644
--- a/config/defconfig_tile-tilegx-linuxapp-gcc
+++ b/config/defconfig_tile-tilegx-linuxapp-gcc
@@ -59,6 +59,7 @@ CONFIG_RTE_LIBRTE_IXGBE_PMD=n
CONFIG_RTE_LIBRTE_I40E_PMD=n
CONFIG_RTE_LIBRTE_FM10K_PMD=n
CONFIG_RTE_LIBRTE_VIRTIO_PMD=n
+CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=n
CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
CONFIG_RTE_LIBRTE_ENIC_PMD=n
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 63b53f7..e9b42f3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -499,6 +499,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}
+#ifdef RTE_LIBRTE_VIRTIO_INC_VECTOR
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
struct virtio_hw *hw = dev->data->dev_private;
/* Use simple rx/tx func if single segment and no offloads */
@@ -510,6 +511,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
hw->use_simple_rxtx = 1;
}
#endif
+#endif
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, (void **)&txvq);
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 1/4] virtio: Fix compile time dependency of use_simple_rxtx usage Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR Jerin Jacob
@ 2016-06-27 11:54 ` Jerin Jacob
2016-06-28 6:17 ` Jianbo Liu
2016-06-27 11:54 ` [dpdk-dev] [PATCH 4/4] virtio: add neon support Jerin Jacob
` (2 subsequent siblings)
5 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-06-27 11:54 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
split out SSE instruction based virtio simple rx
implementation to a separate file
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/virtio_rxtx_simple.c | 166 +-------------------
drivers/net/virtio/virtio_rxtx_simple_sse.h | 225 ++++++++++++++++++++++++++++
2 files changed, 226 insertions(+), 165 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.h
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 67430da..ca87605 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -130,171 +130,7 @@ virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
}
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-
-#include <tmmintrin.h>
-
-/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
- *
- * This routine is for non-mergeable RX, one desc for each guest buffer.
- * This routine is based on the RX ring layout optimization. Each entry in the
- * avail ring points to the desc with the same index in the desc ring and this
- * will never be changed in the driver.
- *
- * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
- */
-uint16_t
-virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
-{
- struct virtnet_rx *rxvq = rx_queue;
- struct virtqueue *vq = rxvq->vq;
- uint16_t nb_used;
- uint16_t desc_idx;
- struct vring_used_elem *rused;
- struct rte_mbuf **sw_ring;
- struct rte_mbuf **sw_ring_end;
- uint16_t nb_pkts_received;
- __m128i shuf_msk1, shuf_msk2, len_adjust;
-
- shuf_msk1 = _mm_set_epi8(
- 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, /* vlan tci */
- 5, 4, /* dat len */
- 0xFF, 0xFF, 5, 4, /* pkt len */
- 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
-
- );
-
- shuf_msk2 = _mm_set_epi8(
- 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, /* vlan tci */
- 13, 12, /* dat len */
- 0xFF, 0xFF, 13, 12, /* pkt len */
- 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
- );
-
- /* Subtract the header length.
- * In which case do we need the header length in used->len ?
- */
- len_adjust = _mm_set_epi16(
- 0, 0,
- 0,
- (uint16_t)-vq->hw->vtnet_hdr_size,
- 0, (uint16_t)-vq->hw->vtnet_hdr_size,
- 0, 0);
-
- if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
- return 0;
-
- nb_used = VIRTQUEUE_NUSED(vq);
-
- rte_compiler_barrier();
-
- if (unlikely(nb_used == 0))
- return 0;
-
- nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
- nb_used = RTE_MIN(nb_used, nb_pkts);
-
- desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
- rused = &vq->vq_ring.used->ring[desc_idx];
- sw_ring = &vq->sw_ring[desc_idx];
- sw_ring_end = &vq->sw_ring[vq->vq_nentries];
-
- _mm_prefetch((const void *)rused, _MM_HINT_T0);
-
- if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
- virtio_rxq_rearm_vec(rxvq);
- if (unlikely(virtqueue_kick_prepare(vq)))
- virtqueue_notify(vq);
- }
-
- for (nb_pkts_received = 0;
- nb_pkts_received < nb_used;) {
- __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
- __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
- __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
-
- mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
- desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
- _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
-
- mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
- desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
- _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
-
- mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
- desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
- _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
-
- mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
- desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
- _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
-
- pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
- pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
- pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
- pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
- pkt_mb[1]);
- _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
- pkt_mb[0]);
-
- pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
- pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
- pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
- pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
- pkt_mb[3]);
- _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
- pkt_mb[2]);
-
- pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
- pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
- pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
- pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
- pkt_mb[5]);
- _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
- pkt_mb[4]);
-
- pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
- pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
- pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
- pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
- pkt_mb[7]);
- _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
- pkt_mb[6]);
-
- if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
- if (sw_ring + nb_used <= sw_ring_end)
- nb_pkts_received += nb_used;
- else
- nb_pkts_received += sw_ring_end - sw_ring;
- break;
- } else {
- if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
- sw_ring_end)) {
- nb_pkts_received += sw_ring_end - sw_ring;
- break;
- } else {
- nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
-
- rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
- sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
- rused += RTE_VIRTIO_DESC_PER_LOOP;
- nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
- }
- }
- }
-
- vq->vq_used_cons_idx += nb_pkts_received;
- vq->vq_free_cnt += nb_pkts_received;
- rxvq->stats.packets += nb_pkts_received;
- return nb_pkts_received;
-}
-
+#include "virtio_rxtx_simple_sse.h"
#endif
#define VIRTIO_TX_FREE_THRESH 32
diff --git a/drivers/net/virtio/virtio_rxtx_simple_sse.h b/drivers/net/virtio/virtio_rxtx_simple_sse.h
new file mode 100644
index 0000000..4e8728f
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple_sse.h
@@ -0,0 +1,225 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tmmintrin.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
+ uint16_t nb_used;
+ uint16_t desc_idx;
+ struct vring_used_elem *rused;
+ struct rte_mbuf **sw_ring;
+ struct rte_mbuf **sw_ring_end;
+ uint16_t nb_pkts_received;
+ __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+ shuf_msk1 = _mm_set_epi8(
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, /* vlan tci */
+ 5, 4, /* dat len */
+ 0xFF, 0xFF, 5, 4, /* pkt len */
+ 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
+
+ );
+
+ shuf_msk2 = _mm_set_epi8(
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, /* vlan tci */
+ 13, 12, /* dat len */
+ 0xFF, 0xFF, 13, 12, /* pkt len */
+ 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
+ );
+
+ /* Subtract the header length.
+ * In which case do we need the header length in used->len ?
+ */
+ len_adjust = _mm_set_epi16(
+ 0, 0,
+ 0,
+ (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, 0);
+
+ if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+ return 0;
+
+ nb_used = VIRTQUEUE_NUSED(vq);
+
+ rte_compiler_barrier();
+
+ if (unlikely(nb_used == 0))
+ return 0;
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+ nb_used = RTE_MIN(nb_used, nb_pkts);
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+ _mm_prefetch((const void *)rused, _MM_HINT_T0);
+
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ virtio_rxq_rearm_vec(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
+ }
+
+ for (nb_pkts_received = 0;
+ nb_pkts_received < nb_used;) {
+ __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+ mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
+ desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
+ _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
+
+ mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
+ desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
+ _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
+
+ mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
+ desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
+ _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
+
+ mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
+ desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
+ _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
+
+ pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
+ pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
+ pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
+ pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
+ pkt_mb[1]);
+ _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
+ pkt_mb[0]);
+
+ pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
+ pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
+ pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
+ pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
+ pkt_mb[3]);
+ _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
+ pkt_mb[2]);
+
+ pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
+ pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
+ pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
+ pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
+ pkt_mb[5]);
+ _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
+ pkt_mb[4]);
+
+ pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
+ pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
+ pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
+ pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
+ pkt_mb[7]);
+ _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
+ pkt_mb[6]);
+
+ if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+ if (sw_ring + nb_used <= sw_ring_end)
+ nb_pkts_received += nb_used;
+ else
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+ sw_ring_end)) {
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+ rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+ sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+ rused += RTE_VIRTIO_DESC_PER_LOOP;
+ nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+ }
+ }
+ }
+
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
+ return nb_pkts_received;
+}
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH 4/4] virtio: add neon support
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
` (2 preceding siblings ...)
2016-06-27 11:54 ` [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
@ 2016-06-27 11:54 ` Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
2016-07-01 11:19 ` [dpdk-dev] [PATCH v2 0/3] " Jerin Jacob
5 siblings, 0 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-06-27 11:54 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Added neon based Rx vector implementation for virtio.
Selected neon based virtio implementation for ARM64 as
default and updated the MAINTAINERS file.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
MAINTAINERS | 1 +
config/defconfig_arm64-armv8a-linuxapp-gcc | 1 -
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/virtio/virtio_rxtx.c | 2 +-
drivers/net/virtio/virtio_rxtx_simple.c | 2 +
drivers/net/virtio/virtio_rxtx_simple_neon.h | 238 +++++++++++++++++++++++++++
6 files changed, 244 insertions(+), 2 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.h
diff --git a/MAINTAINERS b/MAINTAINERS
index f6c0d3d..2bb12aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -142,6 +142,7 @@ F: lib/librte_eal/common/include/arch/arm/*_64.h
F: lib/librte_acl/acl_run_neon.*
F: lib/librte_lpm/rte_lpm_neon.h
F: lib/librte_hash/rte*_arm64.h
+F: drivers/net/virtio/virtio_rxtx_simple_neon.h
EZchip TILE-Gx
M: Zhigang Lu <zlu@ezchip.com>
diff --git a/config/defconfig_arm64-armv8a-linuxapp-gcc b/config/defconfig_arm64-armv8a-linuxapp-gcc
index 95ed30e..a786562 100644
--- a/config/defconfig_arm64-armv8a-linuxapp-gcc
+++ b/config/defconfig_arm64-armv8a-linuxapp-gcc
@@ -48,6 +48,5 @@ CONFIG_RTE_IXGBE_INC_VECTOR=n
CONFIG_RTE_LIBRTE_IVSHMEM=n
CONFIG_RTE_LIBRTE_FM10K_PMD=n
CONFIG_RTE_LIBRTE_I40E_PMD=n
-CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=n
CONFIG_RTE_SCHED_VECTOR=n
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 2694f50..3187a33 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -118,6 +118,8 @@ New Features
* Root privilege is a must for sorting hugepages by physical address.
* Can only be used with vhost user backend.
+* **Virtio NEON support for ARM.**
+
Resolved Issues
---------------
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index e9b42f3..ca25db3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -500,7 +500,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
#ifdef RTE_LIBRTE_VIRTIO_INC_VECTOR
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+#if defined RTE_MACHINE_CPUFLAG_SSSE3 || defined RTE_MACHINE_CPUFLAG_NEON
struct virtio_hw *hw = dev->data->dev_private;
/* Use simple rx/tx func if single segment and no offloads */
if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index ca87605..e5dc010 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -131,6 +131,8 @@ virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
#include "virtio_rxtx_simple_sse.h"
+#elif RTE_MACHINE_CPUFLAG_NEON
+#include "virtio_rxtx_simple_neon.h"
#endif
#define VIRTIO_TX_FREE_THRESH 32
diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.h b/drivers/net/virtio/virtio_rxtx_simple_neon.h
new file mode 100644
index 0000000..41f347d
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple_neon.h
@@ -0,0 +1,238 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2016
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_vect.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
+ uint16_t nb_used;
+ uint16_t desc_idx;
+ struct vring_used_elem *rused;
+ struct rte_mbuf **sw_ring;
+ struct rte_mbuf **sw_ring_end;
+ uint16_t nb_pkts_received;
+
+ uint8x16_t shuf_msk1 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+ 4, 5, 0xFF, 0xFF, /* pkt len */
+ 4, 5, /* dat len */
+ 0xFF, 0xFF, /* vlan tci */
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ uint8x16_t shuf_msk2 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+ 12, 13, 0xFF, 0xFF, /* pkt len */
+ 12, 13, /* dat len */
+ 0xFF, 0xFF, /* vlan tci */
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ /* Subtract the header length.
+ * In which case do we need the header length in used->len ?
+ */
+ uint16x8_t len_adjust = {
+ 0, 0,
+ (uint16_t)vq->hw->vtnet_hdr_size, 0,
+ (uint16_t)vq->hw->vtnet_hdr_size,
+ 0,
+ 0, 0
+ };
+
+ if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+ return 0;
+
+ nb_used = VIRTQUEUE_NUSED(vq);
+
+ rte_rmb();
+
+ if (unlikely(nb_used == 0))
+ return 0;
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+ nb_used = RTE_MIN(nb_used, nb_pkts);
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+ rte_prefetch_non_temporal(rused);
+
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ virtio_rxq_rearm_vec(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
+ }
+
+ for (nb_pkts_received = 0;
+ nb_pkts_received < nb_used;) {
+ uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+ mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0));
+ desc[0] = vld1q_u64((uint64_t *)(rused + 0));
+ vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]);
+
+ mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2));
+ desc[1] = vld1q_u64((uint64_t *)(rused + 2));
+ vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]);
+
+ mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4));
+ desc[2] = vld1q_u64((uint64_t *)(rused + 4));
+ vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]);
+
+ mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6));
+ desc[3] = vld1q_u64((uint64_t *)(rused + 6));
+ vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]);
+
+ pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[0]), shuf_msk2));
+ pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[0]), shuf_msk1));
+ pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[1]), len_adjust));
+ pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[0]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1,
+ pkt_mb[1]);
+ vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1,
+ pkt_mb[0]);
+
+ pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[1]), shuf_msk2));
+ pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[1]), shuf_msk1));
+ pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[3]), len_adjust));
+ pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[2]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1,
+ pkt_mb[3]);
+ vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1,
+ pkt_mb[2]);
+
+ pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[2]), shuf_msk2));
+ pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[2]), shuf_msk1));
+ pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[5]), len_adjust));
+ pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[4]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1,
+ pkt_mb[5]);
+ vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1,
+ pkt_mb[4]);
+
+ pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[3]), shuf_msk2));
+ pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[3]), shuf_msk1));
+ pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[7]), len_adjust));
+ pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[6]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1,
+ pkt_mb[7]);
+ vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1,
+ pkt_mb[6]);
+
+ if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+ if (sw_ring + nb_used <= sw_ring_end)
+ nb_pkts_received += nb_used;
+ else
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+ sw_ring_end)) {
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+ rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+ sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+ rused += RTE_VIRTIO_DESC_PER_LOOP;
+ nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+ }
+ }
+ }
+
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
+ return nb_pkts_received;
+}
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-27 11:54 ` [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR Jerin Jacob
@ 2016-06-27 14:19 ` Thomas Monjalon
2016-06-27 14:48 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Thomas Monjalon @ 2016-06-27 14:19 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
2016-06-27 17:24, Jerin Jacob:
> --- a/config/common_base
> +++ b/config/common_base
> @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
I don't remember what means INC_VECTOR?
Why a config option is needed for vector implementations?
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-27 14:19 ` Thomas Monjalon
@ 2016-06-27 14:48 ` Jerin Jacob
2016-06-27 14:59 ` Thomas Monjalon
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-06-27 14:48 UTC (permalink / raw)
To: Thomas Monjalon
Cc: dev, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
On Mon, Jun 27, 2016 at 04:19:57PM +0200, Thomas Monjalon wrote:
> 2016-06-27 17:24, Jerin Jacob:
> > --- a/config/common_base
> > +++ b/config/common_base
> > @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> > +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
>
> I don't remember what means INC_VECTOR?
> Why a config option is needed for vector implementations?
I thought of adding additional configuration option(INC_VECTOR) _apart_ from
cpu flag based scheme in the patch because even though if a given platform
has cpu instruction support, in some platforms scalar version may
perform well wrt vector version(based on instruction latency, emulation required or not
etc). So a top level flag INC_VECTOR, can override the vector selection
for a given platform if required.
Regarding INC_VECTOR(INC in vector configuration name, I have no idea, I
followed the existing flags)
$ grep "INC_VECTOR" config/common_base
CONFIG_RTE_IXGBE_INC_VECTOR=y
CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
Jerin
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-27 14:48 ` Jerin Jacob
@ 2016-06-27 14:59 ` Thomas Monjalon
2016-06-29 11:18 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Thomas Monjalon @ 2016-06-27 14:59 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
2016-06-27 20:18, Jerin Jacob:
> On Mon, Jun 27, 2016 at 04:19:57PM +0200, Thomas Monjalon wrote:
> > 2016-06-27 17:24, Jerin Jacob:
> > > --- a/config/common_base
> > > +++ b/config/common_base
> > > @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> > > +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
> >
> > I don't remember what means INC_VECTOR?
> > Why a config option is needed for vector implementations?
>
> I thought of adding additional configuration option(INC_VECTOR) _apart_ from
> cpu flag based scheme in the patch because even though if a given platform
> has cpu instruction support, in some platforms scalar version may
> perform well wrt vector version(based on instruction latency, emulation required or not
> etc). So a top level flag INC_VECTOR, can override the vector selection
> for a given platform if required.
Isn't it a runtime driver option needed to disable vector virtio?
> Regarding INC_VECTOR(INC in vector configuration name, I have no idea, I
> followed the existing flags)
> $ grep "INC_VECTOR" config/common_base
> CONFIG_RTE_IXGBE_INC_VECTOR=y
> CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
If the flag is really needed I would suggest VIRTIO_VECTOR.
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file
2016-06-27 11:54 ` [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
@ 2016-06-28 6:17 ` Jianbo Liu
2016-06-29 11:27 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Jianbo Liu @ 2016-06-28 6:17 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, Thomas Monjalon, Bruce Richardson, huawei.xie, yuanhan.liu
On 27 June 2016 at 19:54, Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
> split out SSE instruction based virtio simple rx
> implementation to a separate file
>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
> drivers/net/virtio/virtio_rxtx_simple.c | 166 +-------------------
> drivers/net/virtio/virtio_rxtx_simple_sse.h | 225 ++++++++++++++++++++++++++++
> 2 files changed, 226 insertions(+), 165 deletions(-)
> create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.h
>
I think it's better to move sse implementation to a C file,
as Bruce pointed out at
http://www.dpdk.org/ml/archives/dev/2016-April/037937.html
Jianbo
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-27 14:59 ` Thomas Monjalon
@ 2016-06-29 11:18 ` Jerin Jacob
2016-06-29 11:25 ` Thomas Monjalon
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-06-29 11:18 UTC (permalink / raw)
To: Thomas Monjalon
Cc: dev, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
On Mon, Jun 27, 2016 at 04:59:42PM +0200, Thomas Monjalon wrote:
> 2016-06-27 20:18, Jerin Jacob:
> > On Mon, Jun 27, 2016 at 04:19:57PM +0200, Thomas Monjalon wrote:
> > > 2016-06-27 17:24, Jerin Jacob:
> > > > --- a/config/common_base
> > > > +++ b/config/common_base
> > > > @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> > > > +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
> > >
> > > I don't remember what means INC_VECTOR?
> > > Why a config option is needed for vector implementations?
> >
> > I thought of adding additional configuration option(INC_VECTOR) _apart_ from
> > cpu flag based scheme in the patch because even though if a given platform
> > has cpu instruction support, in some platforms scalar version may
> > perform well wrt vector version(based on instruction latency, emulation required or not
> > etc). So a top level flag INC_VECTOR, can override the vector selection
> > for a given platform if required.
>
> Isn't it a runtime driver option needed to disable vector virtio?
>
> > Regarding INC_VECTOR(INC in vector configuration name, I have no idea, I
> > followed the existing flags)
> > $ grep "INC_VECTOR" config/common_base
> > CONFIG_RTE_IXGBE_INC_VECTOR=y
> > CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> > CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
>
> If the flag is really needed I would suggest VIRTIO_VECTOR.
OK I will change to VIRTIO_VECTOR
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-29 11:18 ` Jerin Jacob
@ 2016-06-29 11:25 ` Thomas Monjalon
2016-06-29 11:40 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Thomas Monjalon @ 2016-06-29 11:25 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
2016-06-29 16:48, Jerin Jacob:
> On Mon, Jun 27, 2016 at 04:59:42PM +0200, Thomas Monjalon wrote:
> > 2016-06-27 20:18, Jerin Jacob:
> > > On Mon, Jun 27, 2016 at 04:19:57PM +0200, Thomas Monjalon wrote:
> > > > 2016-06-27 17:24, Jerin Jacob:
> > > > > --- a/config/common_base
> > > > > +++ b/config/common_base
> > > > > @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> > > > > +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
> > > >
> > > > I don't remember what means INC_VECTOR?
> > > > Why a config option is needed for vector implementations?
> > >
> > > I thought of adding additional configuration option(INC_VECTOR) _apart_ from
> > > cpu flag based scheme in the patch because even though if a given platform
> > > has cpu instruction support, in some platforms scalar version may
> > > perform well wrt vector version(based on instruction latency, emulation required or not
> > > etc). So a top level flag INC_VECTOR, can override the vector selection
> > > for a given platform if required.
> >
> > Isn't it a runtime driver option needed to disable vector virtio?
> >
> > > Regarding INC_VECTOR(INC in vector configuration name, I have no idea, I
> > > followed the existing flags)
> > > $ grep "INC_VECTOR" config/common_base
> > > CONFIG_RTE_IXGBE_INC_VECTOR=y
> > > CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> > > CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
> >
> > If the flag is really needed I would suggest VIRTIO_VECTOR.
>
> OK I will change to VIRTIO_VECTOR
I would prefer a runtime option.
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file
2016-06-28 6:17 ` Jianbo Liu
@ 2016-06-29 11:27 ` Jerin Jacob
2016-06-30 5:43 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-06-29 11:27 UTC (permalink / raw)
To: Jianbo Liu
Cc: dev, Thomas Monjalon, Bruce Richardson, huawei.xie, yuanhan.liu
On Tue, Jun 28, 2016 at 02:17:41PM +0800, Jianbo Liu wrote:
> On 27 June 2016 at 19:54, Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
> > split out SSE instruction based virtio simple rx
> > implementation to a separate file
> >
> > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > ---
> > drivers/net/virtio/virtio_rxtx_simple.c | 166 +-------------------
> > drivers/net/virtio/virtio_rxtx_simple_sse.h | 225 ++++++++++++++++++++++++++++
> > 2 files changed, 226 insertions(+), 165 deletions(-)
> > create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.h
> >
> I think it's better to move sse implementation to a C file,
> as Bruce pointed out at
> http://www.dpdk.org/ml/archives/dev/2016-April/037937.html
I can move to C file, That would call for further restructuring of the code
by Introducing a new file drivers/net/virtio/virtio_rxtx_simple.h and
moving all static inline functions of virtio_rxtx_simple.c so that
virtio_rxtx_simple_sse.c and virtio_rxtx_simple_neon.c can include it.
Huawei,Yuanhan,All,
Are you OK with above restructuring?
Jerin
>
> Jianbo
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-29 11:25 ` Thomas Monjalon
@ 2016-06-29 11:40 ` Jerin Jacob
2016-06-30 5:44 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-06-29 11:40 UTC (permalink / raw)
To: Thomas Monjalon
Cc: dev, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
On Wed, Jun 29, 2016 at 01:25:35PM +0200, Thomas Monjalon wrote:
> 2016-06-29 16:48, Jerin Jacob:
> > On Mon, Jun 27, 2016 at 04:59:42PM +0200, Thomas Monjalon wrote:
> > > 2016-06-27 20:18, Jerin Jacob:
> > > > On Mon, Jun 27, 2016 at 04:19:57PM +0200, Thomas Monjalon wrote:
> > > > > 2016-06-27 17:24, Jerin Jacob:
> > > > > > --- a/config/common_base
> > > > > > +++ b/config/common_base
> > > > > > @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> > > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> > > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> > > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> > > > > > +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
> > > > >
> > > > > I don't remember what means INC_VECTOR?
> > > > > Why a config option is needed for vector implementations?
> > > >
> > > > I thought of adding additional configuration option(INC_VECTOR) _apart_ from
> > > > cpu flag based scheme in the patch because even though if a given platform
> > > > has cpu instruction support, in some platforms scalar version may
> > > > perform well wrt vector version(based on instruction latency, emulation required or not
> > > > etc). So a top level flag INC_VECTOR, can override the vector selection
> > > > for a given platform if required.
> > >
> > > Isn't it a runtime driver option needed to disable vector virtio?
> > >
> > > > Regarding INC_VECTOR(INC in vector configuration name, I have no idea, I
> > > > followed the existing flags)
> > > > $ grep "INC_VECTOR" config/common_base
> > > > CONFIG_RTE_IXGBE_INC_VECTOR=y
> > > > CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> > > > CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
> > >
> > > If the flag is really needed I would suggest VIRTIO_VECTOR.
> >
> > OK I will change to VIRTIO_VECTOR
>
> I would prefer a runtime option.
OK
The platform I test their was NO need for additional VIRTIO_VECTOR
configuration as NEON versions outperforms than scalar version.
I thought of adding this option to override for any platform if it need
to accommodate such platform differences NEON vs scalar versions.
I will change completely to run-time detection based on cpuflags for IA and ARM.
Any objections?
Jerin
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file
2016-06-29 11:27 ` Jerin Jacob
@ 2016-06-30 5:43 ` Yuanhan Liu
0 siblings, 0 replies; 44+ messages in thread
From: Yuanhan Liu @ 2016-06-30 5:43 UTC (permalink / raw)
To: Jerin Jacob
Cc: Jianbo Liu, dev, Thomas Monjalon, Bruce Richardson, huawei.xie
On Wed, Jun 29, 2016 at 04:57:46PM +0530, Jerin Jacob wrote:
> On Tue, Jun 28, 2016 at 02:17:41PM +0800, Jianbo Liu wrote:
> > On 27 June 2016 at 19:54, Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
> > > split out SSE instruction based virtio simple rx
> > > implementation to a separate file
> > >
> > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > > ---
> > > drivers/net/virtio/virtio_rxtx_simple.c | 166 +-------------------
> > > drivers/net/virtio/virtio_rxtx_simple_sse.h | 225 ++++++++++++++++++++++++++++
> > > 2 files changed, 226 insertions(+), 165 deletions(-)
> > > create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.h
> > >
> > I think it's better to move sse implementation to a C file,
> > as Bruce pointed out at
> > http://www.dpdk.org/ml/archives/dev/2016-April/037937.html
>
> I can move to C file, That would call for further restructuring of the code
> by Introducing a new file drivers/net/virtio/virtio_rxtx_simple.h and
> moving all static inline functions of virtio_rxtx_simple.c so that
> virtio_rxtx_simple_sse.c and virtio_rxtx_simple_neon.c can include it.
>
> Huawei,Yuanhan,All,
>
> Are you OK with above restructuring?
Yes, I think that's better.
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR
2016-06-29 11:40 ` Jerin Jacob
@ 2016-06-30 5:44 ` Yuanhan Liu
0 siblings, 0 replies; 44+ messages in thread
From: Yuanhan Liu @ 2016-06-30 5:44 UTC (permalink / raw)
To: Jerin Jacob
Cc: Thomas Monjalon, dev, bruce.richardson, jianbo.liu, huawei.xie
On Wed, Jun 29, 2016 at 05:10:31PM +0530, Jerin Jacob wrote:
> On Wed, Jun 29, 2016 at 01:25:35PM +0200, Thomas Monjalon wrote:
> > 2016-06-29 16:48, Jerin Jacob:
> > > On Mon, Jun 27, 2016 at 04:59:42PM +0200, Thomas Monjalon wrote:
> > > > 2016-06-27 20:18, Jerin Jacob:
> > > > > On Mon, Jun 27, 2016 at 04:19:57PM +0200, Thomas Monjalon wrote:
> > > > > > 2016-06-27 17:24, Jerin Jacob:
> > > > > > > --- a/config/common_base
> > > > > > > +++ b/config/common_base
> > > > > > > @@ -267,6 +267,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
> > > > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> > > > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
> > > > > > > CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
> > > > > > > +CONFIG_RTE_LIBRTE_VIRTIO_INC_VECTOR=y
> > > > > >
> > > > > > I don't remember what means INC_VECTOR?
> > > > > > Why a config option is needed for vector implementations?
> > > > >
> > > > > I thought of adding additional configuration option(INC_VECTOR) _apart_ from
> > > > > cpu flag based scheme in the patch because even though if a given platform
> > > > > has cpu instruction support, in some platforms scalar version may
> > > > > perform well wrt vector version(based on instruction latency, emulation required or not
> > > > > etc). So a top level flag INC_VECTOR, can override the vector selection
> > > > > for a given platform if required.
> > > >
> > > > Isn't it a runtime driver option needed to disable vector virtio?
> > > >
> > > > > Regarding INC_VECTOR(INC in vector configuration name, I have no idea, I
> > > > > followed the existing flags)
> > > > > $ grep "INC_VECTOR" config/common_base
> > > > > CONFIG_RTE_IXGBE_INC_VECTOR=y
> > > > > CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> > > > > CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
> > > >
> > > > If the flag is really needed I would suggest VIRTIO_VECTOR.
> > >
> > > OK I will change to VIRTIO_VECTOR
> >
> > I would prefer a runtime option.
+1
>
> OK
>
> The platform I test their was NO need for additional VIRTIO_VECTOR
> configuration as NEON versions outperforms than scalar version.
>
> I thought of adding this option to override for any platform if it need
> to accommodate such platform differences NEON vs scalar versions.
>
> I will change completely to run-time detection based on cpuflags for IA and ARM.
> Any objections?
Nope from me.
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com>
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
` (3 preceding siblings ...)
2016-06-27 11:54 ` [dpdk-dev] [PATCH 4/4] virtio: add neon support Jerin Jacob
@ 2016-07-01 11:16 ` Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup Jerin Jacob
` (3 more replies)
2016-07-01 11:19 ` [dpdk-dev] [PATCH v2 0/3] " Jerin Jacob
5 siblings, 4 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-07-01 11:16 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie, yuanhan.liu
This patch-set includes,
1) General cleanup of compile time dependency.
2) made vector handler section based on run-time cpuflags
2) Added NEON support for optimized Rx handling
This patch-set is based on dpdk-next-virtio/master at aaaf0c005
v2:
- made vector handler selection based on run-time cpuflags (Suggested by Thomas)
- moved vector implementations to .c file instead of .h file(Suggested by Jianbo)
Jerin Jacob (3):
virtio: conditional compilation cleanup
virtio: move SSE based Rx implementation to separate file
virtio: add neon support
MAINTAINERS | 1 +
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/vision/Wakefully | 7 +-
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 62 +++---
drivers/net/virtio/virtio_rxtx.h | 3 +-
drivers/net/virtio/virtio_rxtx_simple.c | 269 ++-------------------------
drivers/net/virtio/virtio_rxtx_simple.h | 133 +++++++++++++
drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++
drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++++++++++++++++++++++
drivers/net/virtio/virtio_user_ethdev.c | 1 +
11 files changed, 646 insertions(+), 290 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple.h
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.c
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
@ 2016-07-01 11:16 ` Jerin Jacob
2016-07-04 7:36 ` Yuanhan Liu
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file Jerin Jacob
` (2 subsequent siblings)
3 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-01 11:16 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Removed unnecessary compile time dependency on "use_simple_rxtx".
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/Makefile | 3 ---
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 28 +++++++++-------------------
drivers/net/virtio/virtio_rxtx.h | 3 +--
drivers/net/virtio/virtio_rxtx_simple.c | 8 ++++++--
drivers/net/virtio/virtio_user_ethdev.c | 1 +
6 files changed, 18 insertions(+), 26 deletions(-)
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 3020b68..b9b0d8d 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,10 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE3)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
-endif
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index dd7693f..b8295a7 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -253,6 +253,7 @@ struct virtio_hw {
uint8_t use_msix;
uint8_t started;
uint8_t modern;
+ uint8_t use_simple_rxtx;
uint8_t mac_addr[ETHER_ADDR_LEN];
uint32_t notify_off_multiplier;
uint8_t *isr;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index a27208e..63b53f7 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -67,10 +67,6 @@
#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
ETH_TXQ_FLAGS_NOOFFLOADS)
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-static int use_simple_rxtx;
-#endif
-
static void
vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
{
@@ -333,6 +329,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
*/
uint16_t i;
uint16_t desc_idx;
+ struct virtio_hw *hw = dev->data->dev_private;
PMD_INIT_FUNC_TRACE();
@@ -353,8 +350,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
nbufs = 0;
error = ENOSPC;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx) {
+ if (hw->use_simple_rxtx) {
for (desc_idx = 0; desc_idx < vq->vq_nentries;
desc_idx++) {
vq->vq_ring.avail->ring[desc_idx] = desc_idx;
@@ -362,7 +358,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
VRING_DESC_F_WRITE;
}
}
-#endif
+
memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
desc_idx++) {
@@ -378,12 +374,11 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
/******************************************
* Enqueue allocated buffers *
*******************************************/
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx)
+ if (hw->use_simple_rxtx)
error = virtqueue_enqueue_recv_refill_simple(vq, m);
else
-#endif
error = virtqueue_enqueue_recv_refill(vq, m);
+
if (error) {
rte_pktmbuf_free(m);
break;
@@ -404,8 +399,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
struct virtqueue *vq = txvq->vq;
virtio_dev_vring_start(vq);
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx) {
+ if (hw->use_simple_rxtx) {
uint16_t mid_idx = vq->vq_nentries >> 1;
for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
@@ -426,7 +420,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
desc_idx++)
vq->vq_ring.avail->ring[desc_idx] = desc_idx;
}
-#endif
+
VIRTQUEUE_DUMP(vq);
}
}
@@ -456,9 +450,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
dev->data->rx_queues[queue_idx] = rxvq;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
virtio_rxq_vec_setup(rxvq);
-#endif
return 0;
}
@@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
{
uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- struct virtio_hw *hw = dev->data->dev_private;
-#endif
struct virtnet_tx *txvq;
struct virtqueue *vq;
uint16_t tx_free_thresh;
@@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+ struct virtio_hw *hw = dev->data->dev_private;
/* Use simple rx/tx func if single segment and no offloads */
if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO, "Using simple rx/tx path");
dev->tx_pkt_burst = virtio_xmit_pkts_simple;
dev->rx_pkt_burst = virtio_recv_pkts_vec;
- use_simple_rxtx = 1;
+ hw->use_simple_rxtx = 1;
}
#endif
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 058b56a..28f82d6 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -86,10 +86,9 @@ struct virtnet_ctl {
const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
};
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
-#endif
+
#endif /* _VIRTIO_RXTX_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 242ad90..67430da 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -37,8 +37,6 @@
#include <string.h>
#include <errno.h>
-#include <tmmintrin.h>
-
#include <rte_cycles.h>
#include <rte_memory.h>
#include <rte_memzone.h>
@@ -131,6 +129,10 @@ virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
vq_update_avail_idx(vq);
}
+#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+
+#include <tmmintrin.h>
+
/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
*
* This routine is for non-mergeable RX, one desc for each guest buffer.
@@ -293,6 +295,8 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_pkts_received;
}
+#endif
+
#define VIRTIO_TX_FREE_THRESH 32
#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
#define VIRTIO_TX_FREE_NR 32
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 5ab2471..bef8130 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -303,6 +303,7 @@ virtio_user_eth_dev_alloc(const char *name)
hw->vtpci_ops = &virtio_user_ops;
hw->use_msix = 0;
hw->modern = 0;
+ hw->use_simple_rxtx = 0;
hw->virtio_user_dev = dev;
data->dev_private = hw;
data->numa_node = SOCKET_ID_ANY;
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup Jerin Jacob
@ 2016-07-01 11:16 ` Jerin Jacob
2016-07-04 7:42 ` Yuanhan Liu
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 3/3] virtio: add neon support Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
3 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-01 11:16 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
* Introduced cpuflag based run-time detection to
select the SSE based simple Rx handler
* Split out SSE instruction based virtio simple Rx
implementation to a separate file
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/Makefile | 4 +
drivers/net/virtio/virtio_rxtx.c | 35 ++--
drivers/net/virtio/virtio_rxtx_simple.c | 273 ++--------------------------
drivers/net/virtio/virtio_rxtx_simple.h | 133 ++++++++++++++
drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++++++++++++++++++++++
5 files changed, 394 insertions(+), 273 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple.h
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.c
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index b9b0d8d..c4103b7 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
+ifeq ($(CONFIG_RTE_ARCH_X86),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_sse.c
+endif
+
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 63b53f7..a4d4a57 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -50,6 +50,7 @@
#include <rte_string_fns.h>
#include <rte_errno.h>
#include <rte_byteorder.h>
+#include <rte_cpuflags.h>
#include "virtio_logs.h"
#include "virtio_ethdev.h"
@@ -470,6 +471,28 @@ virtio_dev_rx_queue_release(void *rxq)
rte_memzone_free(mz);
}
+static void
+virtio_update_rxtx_handler(struct rte_eth_dev *dev,
+ const struct rte_eth_txconf *tx_conf)
+{
+ uint8_t use_simple_rxtx = 0;
+ struct virtio_hw *hw = dev->data->dev_private;
+
+#if defined RTE_ARCH_X86
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
+ use_simple_rxtx = 1;
+#endif
+ /* Use simple rx/tx func if single segment and no offloads */
+ if (use_simple_rxtx &&
+ (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
+ !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+ PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+ dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+ dev->rx_pkt_burst = virtio_recv_pkts_vec;
+ hw->use_simple_rxtx = use_simple_rxtx;
+ }
+}
+
/*
* struct rte_eth_dev *dev: Used to update dev
* uint16_t nb_desc: Defaults to values read from config space
@@ -499,17 +522,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- struct virtio_hw *hw = dev->data->dev_private;
- /* Use simple rx/tx func if single segment and no offloads */
- if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
- !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
- PMD_INIT_LOG(INFO, "Using simple rx/tx path");
- dev->tx_pkt_burst = virtio_xmit_pkts_simple;
- dev->rx_pkt_burst = virtio_recv_pkts_vec;
- hw->use_simple_rxtx = 1;
- }
-#endif
+ virtio_update_rxtx_handler(dev, tx_conf);
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, (void **)&txvq);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 67430da..485ddce 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -51,14 +51,7 @@
#include <rte_errno.h>
#include <rte_byteorder.h>
-#include "virtio_logs.h"
-#include "virtio_ethdev.h"
-#include "virtqueue.h"
-#include "virtio_rxtx.h"
-
-#define RTE_VIRTIO_VPMD_RX_BURST 32
-#define RTE_VIRTIO_DESC_PER_LOOP 8
-#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+#include "virtio_rxtx_simple.h"
#ifndef __INTEL_COMPILER
#pragma GCC diagnostic ignored "-Wcast-qual"
@@ -89,260 +82,6 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
return 0;
}
-static inline void
-virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
-{
- int i;
- uint16_t desc_idx;
- struct rte_mbuf **sw_ring;
- struct vring_desc *start_dp;
- int ret;
- struct virtqueue *vq = rxvq->vq;
-
- desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
- sw_ring = &vq->sw_ring[desc_idx];
- start_dp = &vq->vq_ring.desc[desc_idx];
-
- ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
- RTE_VIRTIO_VPMD_RX_REARM_THRESH);
- if (unlikely(ret)) {
- rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- return;
- }
-
- for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
- uintptr_t p;
-
- p = (uintptr_t)&sw_ring[i]->rearm_data;
- *(uint64_t *)p = rxvq->mbuf_initializer;
-
- start_dp[i].addr =
- MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) -
- vq->hw->vtnet_hdr_size;
- start_dp[i].len = sw_ring[i]->buf_len -
- RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
- }
-
- vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- vq_update_avail_idx(vq);
-}
-
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-
-#include <tmmintrin.h>
-
-/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
- *
- * This routine is for non-mergeable RX, one desc for each guest buffer.
- * This routine is based on the RX ring layout optimization. Each entry in the
- * avail ring points to the desc with the same index in the desc ring and this
- * will never be changed in the driver.
- *
- * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
- */
-uint16_t
-virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
-{
- struct virtnet_rx *rxvq = rx_queue;
- struct virtqueue *vq = rxvq->vq;
- uint16_t nb_used;
- uint16_t desc_idx;
- struct vring_used_elem *rused;
- struct rte_mbuf **sw_ring;
- struct rte_mbuf **sw_ring_end;
- uint16_t nb_pkts_received;
- __m128i shuf_msk1, shuf_msk2, len_adjust;
-
- shuf_msk1 = _mm_set_epi8(
- 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, /* vlan tci */
- 5, 4, /* dat len */
- 0xFF, 0xFF, 5, 4, /* pkt len */
- 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
-
- );
-
- shuf_msk2 = _mm_set_epi8(
- 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, /* vlan tci */
- 13, 12, /* dat len */
- 0xFF, 0xFF, 13, 12, /* pkt len */
- 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
- );
-
- /* Subtract the header length.
- * In which case do we need the header length in used->len ?
- */
- len_adjust = _mm_set_epi16(
- 0, 0,
- 0,
- (uint16_t)-vq->hw->vtnet_hdr_size,
- 0, (uint16_t)-vq->hw->vtnet_hdr_size,
- 0, 0);
-
- if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
- return 0;
-
- nb_used = VIRTQUEUE_NUSED(vq);
-
- rte_compiler_barrier();
-
- if (unlikely(nb_used == 0))
- return 0;
-
- nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
- nb_used = RTE_MIN(nb_used, nb_pkts);
-
- desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
- rused = &vq->vq_ring.used->ring[desc_idx];
- sw_ring = &vq->sw_ring[desc_idx];
- sw_ring_end = &vq->sw_ring[vq->vq_nentries];
-
- _mm_prefetch((const void *)rused, _MM_HINT_T0);
-
- if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
- virtio_rxq_rearm_vec(rxvq);
- if (unlikely(virtqueue_kick_prepare(vq)))
- virtqueue_notify(vq);
- }
-
- for (nb_pkts_received = 0;
- nb_pkts_received < nb_used;) {
- __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
- __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
- __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
-
- mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
- desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
- _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
-
- mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
- desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
- _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
-
- mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
- desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
- _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
-
- mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
- desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
- _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
-
- pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
- pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
- pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
- pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
- pkt_mb[1]);
- _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
- pkt_mb[0]);
-
- pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
- pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
- pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
- pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
- pkt_mb[3]);
- _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
- pkt_mb[2]);
-
- pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
- pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
- pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
- pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
- pkt_mb[5]);
- _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
- pkt_mb[4]);
-
- pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
- pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
- pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
- pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
- pkt_mb[7]);
- _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
- pkt_mb[6]);
-
- if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
- if (sw_ring + nb_used <= sw_ring_end)
- nb_pkts_received += nb_used;
- else
- nb_pkts_received += sw_ring_end - sw_ring;
- break;
- } else {
- if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
- sw_ring_end)) {
- nb_pkts_received += sw_ring_end - sw_ring;
- break;
- } else {
- nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
-
- rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
- sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
- rused += RTE_VIRTIO_DESC_PER_LOOP;
- nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
- }
- }
- }
-
- vq->vq_used_cons_idx += nb_pkts_received;
- vq->vq_free_cnt += nb_pkts_received;
- rxvq->stats.packets += nb_pkts_received;
- return nb_pkts_received;
-}
-
-#endif
-
-#define VIRTIO_TX_FREE_THRESH 32
-#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
-#define VIRTIO_TX_FREE_NR 32
-/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */
-static inline void
-virtio_xmit_cleanup(struct virtqueue *vq)
-{
- uint16_t i, desc_idx;
- int nb_free = 0;
- struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
-
- desc_idx = (uint16_t)(vq->vq_used_cons_idx &
- ((vq->vq_nentries >> 1) - 1));
- m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
- m = __rte_pktmbuf_prefree_seg(m);
- if (likely(m != NULL)) {
- free[0] = m;
- nb_free = 1;
- for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
- m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
- m = __rte_pktmbuf_prefree_seg(m);
- if (likely(m != NULL)) {
- if (likely(m->pool == free[0]->pool))
- free[nb_free++] = m;
- else {
- rte_mempool_put_bulk(free[0]->pool,
- (void **)free, nb_free);
- free[0] = m;
- nb_free = 1;
- }
- }
- }
- rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
- } else {
- for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
- m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
- m = __rte_pktmbuf_prefree_seg(m);
- if (m != NULL)
- rte_mempool_put(m->pool, m);
- }
- }
-
- vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
- vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
-}
-
uint16_t
virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
@@ -423,3 +162,13 @@ virtio_rxq_vec_setup(struct virtnet_rx *rxq)
return 0;
}
+
+/* Stub for linkage when arch specific implementation is not available */
+uint16_t __attribute__((weak))
+virtio_recv_pkts_vec(void *rx_queue __rte_unused,
+ struct rte_mbuf **rx_pkts __rte_unused,
+ uint16_t nb_pkts __rte_unused)
+{
+ rte_panic("Wrong weak function linked by linker\n");
+ return 0;
+}
diff --git a/drivers/net/virtio/virtio_rxtx_simple.h b/drivers/net/virtio/virtio_rxtx_simple.h
new file mode 100644
index 0000000..8cb43c0
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.h
@@ -0,0 +1,133 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_RXTX_SIMPLE_H_
+#define _VIRTIO_RXTX_SIMPLE_H_
+
+#include <stdint.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+static inline void
+virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
+{
+ int i;
+ uint16_t desc_idx;
+ struct rte_mbuf **sw_ring;
+ struct vring_desc *start_dp;
+ int ret;
+ struct virtqueue *vq = rxvq->vq;
+
+ desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+ sw_ring = &vq->sw_ring[desc_idx];
+ start_dp = &vq->vq_ring.desc[desc_idx];
+
+ ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+ RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+ if (unlikely(ret)) {
+ rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ return;
+ }
+
+ for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+ uintptr_t p;
+
+ p = (uintptr_t)&sw_ring[i]->rearm_data;
+ *(uint64_t *)p = rxvq->mbuf_initializer;
+
+ start_dp[i].addr =
+ MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) -
+ vq->hw->vtnet_hdr_size;
+ start_dp[i].len = sw_ring[i]->buf_len -
+ RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
+ }
+
+ vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ vq_update_avail_idx(vq);
+}
+
+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+ uint16_t i, desc_idx;
+ int nb_free = 0;
+ struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+ ((vq->vq_nentries >> 1) - 1));
+ m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+ m = __rte_pktmbuf_prefree_seg(m);
+ if (likely(m != NULL)) {
+ free[0] = m;
+ nb_free = 1;
+ for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+ m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+ m = __rte_pktmbuf_prefree_seg(m);
+ if (likely(m != NULL)) {
+ if (likely(m->pool == free[0]->pool))
+ free[nb_free++] = m;
+ else {
+ rte_mempool_put_bulk(free[0]->pool,
+ (void **)free, nb_free);
+ free[0] = m;
+ nb_free = 1;
+ }
+ }
+ }
+ rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+ } else {
+ for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+ m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+ m = __rte_pktmbuf_prefree_seg(m);
+ if (m != NULL)
+ rte_mempool_put(m->pool, m);
+ }
+ }
+
+ vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+ vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+#endif /* _VIRTIO_RXTX_SIMPLE_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple_sse.c b/drivers/net/virtio/virtio_rxtx_simple_sse.c
new file mode 100644
index 0000000..39000e8
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple_sse.c
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tmmintrin.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+
+#include "virtio_rxtx_simple.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
+ uint16_t nb_used;
+ uint16_t desc_idx;
+ struct vring_used_elem *rused;
+ struct rte_mbuf **sw_ring;
+ struct rte_mbuf **sw_ring_end;
+ uint16_t nb_pkts_received;
+ __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+ shuf_msk1 = _mm_set_epi8(
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, /* vlan tci */
+ 5, 4, /* dat len */
+ 0xFF, 0xFF, 5, 4, /* pkt len */
+ 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
+
+ );
+
+ shuf_msk2 = _mm_set_epi8(
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, /* vlan tci */
+ 13, 12, /* dat len */
+ 0xFF, 0xFF, 13, 12, /* pkt len */
+ 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
+ );
+
+ /* Subtract the header length.
+ * In which case do we need the header length in used->len ?
+ */
+ len_adjust = _mm_set_epi16(
+ 0, 0,
+ 0,
+ (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, 0);
+
+ if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+ return 0;
+
+ nb_used = VIRTQUEUE_NUSED(vq);
+
+ rte_compiler_barrier();
+
+ if (unlikely(nb_used == 0))
+ return 0;
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+ nb_used = RTE_MIN(nb_used, nb_pkts);
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+ _mm_prefetch((const void *)rused, _MM_HINT_T0);
+
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ virtio_rxq_rearm_vec(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
+ }
+
+ for (nb_pkts_received = 0;
+ nb_pkts_received < nb_used;) {
+ __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+ mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
+ desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
+ _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
+
+ mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
+ desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
+ _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
+
+ mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
+ desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
+ _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
+
+ mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
+ desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
+ _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
+
+ pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
+ pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
+ pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
+ pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
+ pkt_mb[1]);
+ _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
+ pkt_mb[0]);
+
+ pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
+ pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
+ pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
+ pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
+ pkt_mb[3]);
+ _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
+ pkt_mb[2]);
+
+ pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
+ pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
+ pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
+ pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
+ pkt_mb[5]);
+ _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
+ pkt_mb[4]);
+
+ pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
+ pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
+ pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
+ pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
+ pkt_mb[7]);
+ _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
+ pkt_mb[6]);
+
+ if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+ if (sw_ring + nb_used <= sw_ring_end)
+ nb_pkts_received += nb_used;
+ else
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+ sw_ring_end)) {
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+ rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+ sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+ rused += RTE_VIRTIO_DESC_PER_LOOP;
+ nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+ }
+ }
+ }
+
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
+ return nb_pkts_received;
+}
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v2 3/3] virtio: add neon support
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file Jerin Jacob
@ 2016-07-01 11:16 ` Jerin Jacob
2016-07-04 7:53 ` Yuanhan Liu
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
3 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-01 11:16 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Added neon based Rx vector implementation.
Selection of the new handler based neon availability at runtime.
Updated the release notes and MAINTAINERS file.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
MAINTAINERS | 1 +
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/virtio/Makefile | 2 +
drivers/net/virtio/virtio_rxtx.c | 3 +
drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++++++
5 files changed, 243 insertions(+)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
diff --git a/MAINTAINERS b/MAINTAINERS
index a59191e..ab04cee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -143,6 +143,7 @@ F: lib/librte_acl/acl_run_neon.*
F: lib/librte_lpm/rte_lpm_neon.h
F: lib/librte_hash/rte*_arm64.h
F: drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+F: drivers/net/virtio/virtio_rxtx_simple_neon.c
EZchip TILE-Gx
M: Zhigang Lu <zlu@ezchip.com>
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 9e2a817..3a5add5 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -174,6 +174,8 @@ New Features
section of the "Network Interface Controller Drivers" document.
+* **Virtio NEON support for ARM.**
+
Resolved Issues
---------------
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index c4103b7..97972a6 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -54,6 +54,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
ifeq ($(CONFIG_RTE_ARCH_X86),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_sse.c
+else ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c
endif
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index a4d4a57..19d1742 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -481,6 +481,9 @@ virtio_update_rxtx_handler(struct rte_eth_dev *dev,
#if defined RTE_ARCH_X86
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
use_simple_rxtx = 1;
+#elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+ use_simple_rxtx = 1;
#endif
/* Use simple rx/tx func if single segment and no offloads */
if (use_simple_rxtx &&
diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c b/drivers/net/virtio/virtio_rxtx_simple_neon.c
new file mode 100644
index 0000000..793eefb
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c
@@ -0,0 +1,235 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2016
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_vect.h>
+
+#include "virtio_rxtx_simple.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
+ uint16_t nb_used;
+ uint16_t desc_idx;
+ struct vring_used_elem *rused;
+ struct rte_mbuf **sw_ring;
+ struct rte_mbuf **sw_ring_end;
+ uint16_t nb_pkts_received;
+
+ uint8x16_t shuf_msk1 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+ 4, 5, 0xFF, 0xFF, /* pkt len */
+ 4, 5, /* dat len */
+ 0xFF, 0xFF, /* vlan tci */
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ uint8x16_t shuf_msk2 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+ 12, 13, 0xFF, 0xFF, /* pkt len */
+ 12, 13, /* dat len */
+ 0xFF, 0xFF, /* vlan tci */
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ /* Subtract the header length.
+ * In which case do we need the header length in used->len ?
+ */
+ uint16x8_t len_adjust = {
+ 0, 0,
+ (uint16_t)vq->hw->vtnet_hdr_size, 0,
+ (uint16_t)vq->hw->vtnet_hdr_size,
+ 0,
+ 0, 0
+ };
+
+ if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+ return 0;
+
+ nb_used = VIRTQUEUE_NUSED(vq);
+
+ rte_rmb();
+
+ if (unlikely(nb_used == 0))
+ return 0;
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+ nb_used = RTE_MIN(nb_used, nb_pkts);
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+ rte_prefetch_non_temporal(rused);
+
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ virtio_rxq_rearm_vec(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
+ }
+
+ for (nb_pkts_received = 0;
+ nb_pkts_received < nb_used;) {
+ uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+ mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0));
+ desc[0] = vld1q_u64((uint64_t *)(rused + 0));
+ vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]);
+
+ mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2));
+ desc[1] = vld1q_u64((uint64_t *)(rused + 2));
+ vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]);
+
+ mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4));
+ desc[2] = vld1q_u64((uint64_t *)(rused + 4));
+ vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]);
+
+ mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6));
+ desc[3] = vld1q_u64((uint64_t *)(rused + 6));
+ vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]);
+
+ pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[0]), shuf_msk2));
+ pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[0]), shuf_msk1));
+ pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[1]), len_adjust));
+ pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[0]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1,
+ pkt_mb[1]);
+ vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1,
+ pkt_mb[0]);
+
+ pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[1]), shuf_msk2));
+ pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[1]), shuf_msk1));
+ pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[3]), len_adjust));
+ pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[2]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1,
+ pkt_mb[3]);
+ vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1,
+ pkt_mb[2]);
+
+ pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[2]), shuf_msk2));
+ pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[2]), shuf_msk1));
+ pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[5]), len_adjust));
+ pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[4]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1,
+ pkt_mb[5]);
+ vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1,
+ pkt_mb[4]);
+
+ pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[3]), shuf_msk2));
+ pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[3]), shuf_msk1));
+ pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[7]), len_adjust));
+ pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[6]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1,
+ pkt_mb[7]);
+ vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1,
+ pkt_mb[6]);
+
+ if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+ if (sw_ring + nb_used <= sw_ring_end)
+ nb_pkts_received += nb_used;
+ else
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+ sw_ring_end)) {
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+ rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+ sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+ rused += RTE_VIRTIO_DESC_PER_LOOP;
+ nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+ }
+ }
+ }
+
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
+ return nb_pkts_received;
+}
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v2 0/3] Virtio NEON support for ARM
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
` (4 preceding siblings ...)
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
@ 2016-07-01 11:19 ` Jerin Jacob
5 siblings, 0 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-07-01 11:19 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
This patch-set includes,
1) General cleanup of compile time dependency.
2) made vector handler section based on run-time cpuflags
2) Added NEON support for optimized Rx handling
This patch-set is based on dpdk-next-virtio/master
v2:
- made vector handler selection based on run-time cpuflags (Suggested by Thomas)
- moved vector implementations to .c file instead of .h file(Suggested by Jianbo)
Jerin Jacob (3):
virtio: conditional compilation cleanup
virtio: move SSE based Rx implementation to separate file
virtio: add neon support
MAINTAINERS | 1 +
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/vision/Wakefully | 7 +-
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 62 +++---
drivers/net/virtio/virtio_rxtx.h | 3 +-
drivers/net/virtio/virtio_rxtx_simple.c | 269 ++-------------------------
drivers/net/virtio/virtio_rxtx_simple.h | 133 +++++++++++++
drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++
drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++++++++++++++++++++++
drivers/net/virtio/virtio_user_ethdev.c | 1 +
11 files changed, 646 insertions(+), 290 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple.h
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.c
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup Jerin Jacob
@ 2016-07-04 7:36 ` Yuanhan Liu
2016-07-04 8:36 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 7:36 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> {
> uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
>
> -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> - struct virtio_hw *hw = dev->data->dev_private;
> -#endif
> struct virtnet_tx *txvq;
> struct virtqueue *vq;
> uint16_t tx_free_thresh;
> @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> }
>
> #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> + struct virtio_hw *hw = dev->data->dev_private;
I'd suggest to move above declaration to ...
> /* Use simple rx/tx func if single segment and no offloads */
> if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
here: we should try to avoid declaring vars in the middle of a code block.
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file Jerin Jacob
@ 2016-07-04 7:42 ` Yuanhan Liu
2016-07-04 8:38 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 7:42 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Fri, Jul 01, 2016 at 04:46:37PM +0530, Jerin Jacob wrote:
> * Introduced cpuflag based run-time detection to
> select the SSE based simple Rx handler
> * Split out SSE instruction based virtio simple Rx
> implementation to a separate file
As your commit log says, it does two things, therefore, I'd suggest you
to do it in two patches, with each just does one thing as you mentioned.
> +static void
> +virtio_update_rxtx_handler(struct rte_eth_dev *dev,
> + const struct rte_eth_txconf *tx_conf)
> +{
> + uint8_t use_simple_rxtx = 0;
> + struct virtio_hw *hw = dev->data->dev_private;
> +
> +#if defined RTE_ARCH_X86
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
> + use_simple_rxtx = 1;
> +#endif
> + /* Use simple rx/tx func if single segment and no offloads */
> + if (use_simple_rxtx &&
> + (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> + !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
The alignment here is not consistent, something like following is what
I'd suggest:
if (use_simple_rxtx &&
(tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] virtio: add neon support
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 3/3] virtio: add neon support Jerin Jacob
@ 2016-07-04 7:53 ` Yuanhan Liu
2016-07-04 8:55 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 7:53 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Fri, Jul 01, 2016 at 04:46:38PM +0530, Jerin Jacob wrote:
> Added neon based Rx vector implementation.
> Selection of the new handler based neon availability at runtime.
> Updated the release notes and MAINTAINERS file.
>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
> MAINTAINERS | 1 +
> doc/guides/rel_notes/release_16_07.rst | 2 +
> drivers/net/virtio/Makefile | 2 +
> drivers/net/virtio/virtio_rxtx.c | 3 +
> drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++++++
> 5 files changed, 243 insertions(+)
> create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a59191e..ab04cee 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -143,6 +143,7 @@ F: lib/librte_acl/acl_run_neon.*
> F: lib/librte_lpm/rte_lpm_neon.h
> F: lib/librte_hash/rte*_arm64.h
> F: drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> +F: drivers/net/virtio/virtio_rxtx_simple_neon.c
>
> EZchip TILE-Gx
> M: Zhigang Lu <zlu@ezchip.com>
> diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
> index 9e2a817..3a5add5 100644
> --- a/doc/guides/rel_notes/release_16_07.rst
> +++ b/doc/guides/rel_notes/release_16_07.rst
This series basically looks good to me, but I don't think we can make it
for v16.07: you missed v1 deadline; it's also too late: rc1 was already out.
--yliu
> @@ -174,6 +174,8 @@ New Features
> section of the "Network Interface Controller Drivers" document.
>
>
> +* **Virtio NEON support for ARM.**
> +
> Resolved Issues
> ---------------
>
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 7:36 ` Yuanhan Liu
@ 2016-07-04 8:36 ` Jerin Jacob
2016-07-04 8:42 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-04 8:36 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > {
> > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> >
> > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > - struct virtio_hw *hw = dev->data->dev_private;
> > -#endif
> > struct virtnet_tx *txvq;
> > struct virtqueue *vq;
> > uint16_t tx_free_thresh;
> > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > }
> >
> > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > + struct virtio_hw *hw = dev->data->dev_private;
>
> I'd suggest to move above declaration to ...
>
> > /* Use simple rx/tx func if single segment and no offloads */
> > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
>
> here: we should try to avoid declaring vars in the middle of a code block.
Next patch in this series, moving all rxtx handler selection code to
separate function(virtio_update_rxtx_handler()) where declaration comes
as first line in the function.i.e the comment is taken care of in the
series.
>
> --yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file
2016-07-04 7:42 ` Yuanhan Liu
@ 2016-07-04 8:38 ` Jerin Jacob
0 siblings, 0 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-07-04 8:38 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 03:42:47PM +0800, Yuanhan Liu wrote:
> On Fri, Jul 01, 2016 at 04:46:37PM +0530, Jerin Jacob wrote:
> > * Introduced cpuflag based run-time detection to
> > select the SSE based simple Rx handler
> > * Split out SSE instruction based virtio simple Rx
> > implementation to a separate file
>
> As your commit log says, it does two things, therefore, I'd suggest you
> to do it in two patches, with each just does one thing as you mentioned.
OK. Will fix it in next revision.
>
> > +static void
> > +virtio_update_rxtx_handler(struct rte_eth_dev *dev,
> > + const struct rte_eth_txconf *tx_conf)
> > +{
> > + uint8_t use_simple_rxtx = 0;
> > + struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +#if defined RTE_ARCH_X86
> > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
> > + use_simple_rxtx = 1;
> > +#endif
> > + /* Use simple rx/tx func if single segment and no offloads */
> > + if (use_simple_rxtx &&
> > + (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > + !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
>
> The alignment here is not consistent, something like following is what
> I'd suggest:
>
> if (use_simple_rxtx &&
> (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
OK. Will fix it in next revision.
>
> --yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 8:36 ` Jerin Jacob
@ 2016-07-04 8:42 ` Yuanhan Liu
2016-07-04 9:07 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 8:42 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 02:06:27PM +0530, Jerin Jacob wrote:
> On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> > On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > {
> > > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> > >
> > > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > - struct virtio_hw *hw = dev->data->dev_private;
> > > -#endif
> > > struct virtnet_tx *txvq;
> > > struct virtqueue *vq;
> > > uint16_t tx_free_thresh;
> > > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > }
> > >
> > > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > + struct virtio_hw *hw = dev->data->dev_private;
> >
> > I'd suggest to move above declaration to ...
> >
> > > /* Use simple rx/tx func if single segment and no offloads */
> > > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> >
> > here: we should try to avoid declaring vars in the middle of a code block.
>
> Next patch in this series, moving all rxtx handler selection code to
> separate function(virtio_update_rxtx_handler()) where declaration comes
> as first line in the function.i.e the comment is taken care of in the
> series.
Yes, I saw that. But in principle, each patch is atomic: it's not a
good idea/practice to introduce issues in path A and then fix it in
path B.
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] virtio: add neon support
2016-07-04 7:53 ` Yuanhan Liu
@ 2016-07-04 8:55 ` Jerin Jacob
2016-07-04 9:02 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-04 8:55 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 03:53:22PM +0800, Yuanhan Liu wrote:
> On Fri, Jul 01, 2016 at 04:46:38PM +0530, Jerin Jacob wrote:
> > Added neon based Rx vector implementation.
> > Selection of the new handler based neon availability at runtime.
> > Updated the release notes and MAINTAINERS file.
> >
> > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > ---
> > MAINTAINERS | 1 +
> > doc/guides/rel_notes/release_16_07.rst | 2 +
> > drivers/net/virtio/Makefile | 2 +
> > drivers/net/virtio/virtio_rxtx.c | 3 +
> > drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++++++
> > 5 files changed, 243 insertions(+)
> > create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index a59191e..ab04cee 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -143,6 +143,7 @@ F: lib/librte_acl/acl_run_neon.*
> > F: lib/librte_lpm/rte_lpm_neon.h
> > F: lib/librte_hash/rte*_arm64.h
> > F: drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> > +F: drivers/net/virtio/virtio_rxtx_simple_neon.c
> >
> > EZchip TILE-Gx
> > M: Zhigang Lu <zlu@ezchip.com>
> > diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
> > index 9e2a817..3a5add5 100644
> > --- a/doc/guides/rel_notes/release_16_07.rst
> > +++ b/doc/guides/rel_notes/release_16_07.rst
>
> This series basically looks good to me, but I don't think we can make it
> for v16.07: you missed v1 deadline; it's also too late: rc1 was already out.
OK. But I thought, Thomas hasn't pulled the changes from dpdk-next-virtio.
Even if didn't make it for v16.07, I would suggest you to consider taking
the changes to dpdk-next-virtio as this change involves file restructuring
(Will have issue with re-basing in future) without having any functional impact.
>
> --yliu
>
> > @@ -174,6 +174,8 @@ New Features
> > section of the "Network Interface Controller Drivers" document.
> >
> >
> > +* **Virtio NEON support for ARM.**
> > +
> > Resolved Issues
> > ---------------
> >
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] virtio: add neon support
2016-07-04 8:55 ` Jerin Jacob
@ 2016-07-04 9:02 ` Yuanhan Liu
0 siblings, 0 replies; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 9:02 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 02:25:55PM +0530, Jerin Jacob wrote:
> On Mon, Jul 04, 2016 at 03:53:22PM +0800, Yuanhan Liu wrote:
> > On Fri, Jul 01, 2016 at 04:46:38PM +0530, Jerin Jacob wrote:
> > > Added neon based Rx vector implementation.
> > > Selection of the new handler based neon availability at runtime.
> > > Updated the release notes and MAINTAINERS file.
> > >
> > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > > ---
> > > MAINTAINERS | 1 +
> > > doc/guides/rel_notes/release_16_07.rst | 2 +
> > > drivers/net/virtio/Makefile | 2 +
> > > drivers/net/virtio/virtio_rxtx.c | 3 +
> > > drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++++++
> > > 5 files changed, 243 insertions(+)
> > > create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
> > >
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index a59191e..ab04cee 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -143,6 +143,7 @@ F: lib/librte_acl/acl_run_neon.*
> > > F: lib/librte_lpm/rte_lpm_neon.h
> > > F: lib/librte_hash/rte*_arm64.h
> > > F: drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> > > +F: drivers/net/virtio/virtio_rxtx_simple_neon.c
> > >
> > > EZchip TILE-Gx
> > > M: Zhigang Lu <zlu@ezchip.com>
> > > diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
> > > index 9e2a817..3a5add5 100644
> > > --- a/doc/guides/rel_notes/release_16_07.rst
> > > +++ b/doc/guides/rel_notes/release_16_07.rst
> >
> > This series basically looks good to me, but I don't think we can make it
> > for v16.07: you missed v1 deadline; it's also too late: rc1 was already out.
>
> OK. But I thought, Thomas hasn't pulled the changes from dpdk-next-virtio.
>
> Even if didn't make it for v16.07, I would suggest you to consider taking
> the changes to dpdk-next-virtio as this change involves file restructuring
> (Will have issue with re-basing in future) without having any functional impact.
Yes, that's my plan. I will do the merge ASAP, when
- v16.07 is out.
- your patches are ready.
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 8:42 ` Yuanhan Liu
@ 2016-07-04 9:07 ` Jerin Jacob
2016-07-04 11:02 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-04 9:07 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 04:42:32PM +0800, Yuanhan Liu wrote:
> On Mon, Jul 04, 2016 at 02:06:27PM +0530, Jerin Jacob wrote:
> > On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> > > On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > > > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > {
> > > > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> > > >
> > > > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > - struct virtio_hw *hw = dev->data->dev_private;
> > > > -#endif
> > > > struct virtnet_tx *txvq;
> > > > struct virtqueue *vq;
> > > > uint16_t tx_free_thresh;
> > > > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > }
> > > >
> > > > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > + struct virtio_hw *hw = dev->data->dev_private;
> > >
> > > I'd suggest to move above declaration to ...
> > >
> > > > /* Use simple rx/tx func if single segment and no offloads */
> > > > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > > > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > >
> > > here: we should try to avoid declaring vars in the middle of a code block.
> >
> > Next patch in this series, moving all rxtx handler selection code to
> > separate function(virtio_update_rxtx_handler()) where declaration comes
> > as first line in the function.i.e the comment is taken care of in the
> > series.
>
> Yes, I saw that. But in principle, each patch is atomic: it's not a
> good idea/practice to introduce issues in path A and then fix it in
> path B.
In my view it was not an issue as I was removing all possible
conditional compilation flag. If I were to move the declaration to top
then another conditional compilation RTE_MACHINE_CPUFLAG_SSSE3
flag I need to add around declaring the variable.
Hope this justifies the reason. If you are not convinced then let me know,
if will add the change in next revision.
Jerin
>
> --yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 9:07 ` Jerin Jacob
@ 2016-07-04 11:02 ` Yuanhan Liu
2016-07-04 12:15 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 11:02 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 02:37:55PM +0530, Jerin Jacob wrote:
> On Mon, Jul 04, 2016 at 04:42:32PM +0800, Yuanhan Liu wrote:
> > On Mon, Jul 04, 2016 at 02:06:27PM +0530, Jerin Jacob wrote:
> > > On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> > > > On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > > > > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > {
> > > > > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> > > > >
> > > > > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > - struct virtio_hw *hw = dev->data->dev_private;
> > > > > -#endif
> > > > > struct virtnet_tx *txvq;
> > > > > struct virtqueue *vq;
> > > > > uint16_t tx_free_thresh;
> > > > > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > }
> > > > >
> > > > > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > + struct virtio_hw *hw = dev->data->dev_private;
> > > >
> > > > I'd suggest to move above declaration to ...
> > > >
> > > > > /* Use simple rx/tx func if single segment and no offloads */
> > > > > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > > > > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > > >
> > > > here: we should try to avoid declaring vars in the middle of a code block.
> > >
> > > Next patch in this series, moving all rxtx handler selection code to
> > > separate function(virtio_update_rxtx_handler()) where declaration comes
> > > as first line in the function.i.e the comment is taken care of in the
> > > series.
> >
> > Yes, I saw that. But in principle, each patch is atomic: it's not a
> > good idea/practice to introduce issues in path A and then fix it in
> > path B.
>
> In my view it was not an issue as I was removing all possible
> conditional compilation flag. If I were to move the declaration to top
> then another conditional compilation RTE_MACHINE_CPUFLAG_SSSE3
> flag I need to add around declaring the variable.
Nope, I was suggesting to move it inside the "if" block. So, this
is actually consistent with what you are trying to do. Besides, it
removes an declaration in the middle.
--yliu
> Hope this justifies the reason. If you are not convinced then let me know,
> if will add the change in next revision.
>
> Jerin
>
> >
> > --yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 11:02 ` Yuanhan Liu
@ 2016-07-04 12:15 ` Jerin Jacob
2016-07-04 12:26 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-04 12:15 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 07:02:25PM +0800, Yuanhan Liu wrote:
> On Mon, Jul 04, 2016 at 02:37:55PM +0530, Jerin Jacob wrote:
> > On Mon, Jul 04, 2016 at 04:42:32PM +0800, Yuanhan Liu wrote:
> > > On Mon, Jul 04, 2016 at 02:06:27PM +0530, Jerin Jacob wrote:
> > > > On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> > > > > On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > > > > > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > > {
> > > > > > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> > > > > >
> > > > > > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > > - struct virtio_hw *hw = dev->data->dev_private;
> > > > > > -#endif
> > > > > > struct virtnet_tx *txvq;
> > > > > > struct virtqueue *vq;
> > > > > > uint16_t tx_free_thresh;
> > > > > > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > > }
> > > > > >
> > > > > > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > > + struct virtio_hw *hw = dev->data->dev_private;
> > > > >
> > > > > I'd suggest to move above declaration to ...
> > > > >
> > > > > > /* Use simple rx/tx func if single segment and no offloads */
> > > > > > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > > > > > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > > > >
> > > > > here: we should try to avoid declaring vars in the middle of a code block.
> > > >
> > > > Next patch in this series, moving all rxtx handler selection code to
> > > > separate function(virtio_update_rxtx_handler()) where declaration comes
> > > > as first line in the function.i.e the comment is taken care of in the
> > > > series.
> > >
> > > Yes, I saw that. But in principle, each patch is atomic: it's not a
> > > good idea/practice to introduce issues in path A and then fix it in
> > > path B.
> >
> > In my view it was not an issue as I was removing all possible
> > conditional compilation flag. If I were to move the declaration to top
> > then another conditional compilation RTE_MACHINE_CPUFLAG_SSSE3
> > flag I need to add around declaring the variable.
>
> Nope, I was suggesting to move it inside the "if" block. So, this
> is actually consistent with what you are trying to do. Besides, it
> removes an declaration in the middle.
Just to get the clarity on "moving inside the 'if' block"
Are you suggesting to have like below?
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+ struct virtio_hw *hw;
/* Use simple rx/tx func if single segment and no offloads */
if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) ==
VIRTIO_SIMPLE_FLAGS &&
!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO, "Using simple rx/tx path");
dev->tx_pkt_burst = virtio_xmit_pkts_simple;
dev->rx_pkt_burst = virtio_recv_pkts_vec;
- use_simple_rxtx = 1;
+ hw = dev->data->dev_private;
+ hw->use_simple_rxtx = 1;
}
#endif
Instead of following scheme in existing patch,
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+ struct virtio_hw *hw = dev->data->dev_private;
/* Use simple rx/tx func if single segment and no offloads */
if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) ==
VIRTIO_SIMPLE_FLAGS &&
!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO, "Using simple rx/tx path");
dev->tx_pkt_burst = virtio_xmit_pkts_simple;
dev->rx_pkt_burst = virtio_recv_pkts_vec;
- use_simple_rxtx = 1;
+ hw->use_simple_rxtx = 1;
}
#endif
The former case will have issue as "hw" been used in "if" with vtpci_with_feature.
OR
if you meant just floating "struct virtio_hw *hw" without RTE_MACHINE_CPUFLAG_SSSE3
then it comes error on non x86 as unused "hw" variable.
If you meant something else then let me know?
>
> --yliu
>
> > Hope this justifies the reason. If you are not convinced then let me know,
> > if will add the change in next revision.
> >
> > Jerin
> >
> > >
> > > --yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 12:15 ` Jerin Jacob
@ 2016-07-04 12:26 ` Yuanhan Liu
2016-07-04 12:50 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 12:26 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 05:45:57PM +0530, Jerin Jacob wrote:
> On Mon, Jul 04, 2016 at 07:02:25PM +0800, Yuanhan Liu wrote:
> > On Mon, Jul 04, 2016 at 02:37:55PM +0530, Jerin Jacob wrote:
> > > On Mon, Jul 04, 2016 at 04:42:32PM +0800, Yuanhan Liu wrote:
> > > > On Mon, Jul 04, 2016 at 02:06:27PM +0530, Jerin Jacob wrote:
> > > > > On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> > > > > > On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > > > > > > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > > > {
> > > > > > > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> > > > > > >
> > > > > > > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > > > - struct virtio_hw *hw = dev->data->dev_private;
> > > > > > > -#endif
> > > > > > > struct virtnet_tx *txvq;
> > > > > > > struct virtqueue *vq;
> > > > > > > uint16_t tx_free_thresh;
> > > > > > > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > > > }
> > > > > > >
> > > > > > > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > > > + struct virtio_hw *hw = dev->data->dev_private;
> > > > > >
> > > > > > I'd suggest to move above declaration to ...
> > > > > >
> > > > > > > /* Use simple rx/tx func if single segment and no offloads */
> > > > > > > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > > > > > > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > > > > >
> > > > > > here: we should try to avoid declaring vars in the middle of a code block.
> > > > >
> > > > > Next patch in this series, moving all rxtx handler selection code to
> > > > > separate function(virtio_update_rxtx_handler()) where declaration comes
> > > > > as first line in the function.i.e the comment is taken care of in the
> > > > > series.
> > > >
> > > > Yes, I saw that. But in principle, each patch is atomic: it's not a
> > > > good idea/practice to introduce issues in path A and then fix it in
> > > > path B.
> > >
> > > In my view it was not an issue as I was removing all possible
> > > conditional compilation flag. If I were to move the declaration to top
> > > then another conditional compilation RTE_MACHINE_CPUFLAG_SSSE3
> > > flag I need to add around declaring the variable.
> >
> > Nope, I was suggesting to move it inside the "if" block. So, this
> > is actually consistent with what you are trying to do. Besides, it
> > removes an declaration in the middle.
>
> Just to get the clarity on "moving inside the 'if' block"
>
> Are you suggesting to have like below?
>
> #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> + struct virtio_hw *hw;
> /* Use simple rx/tx func if single segment and no offloads */
> if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) ==
> VIRTIO_SIMPLE_FLAGS &&
> !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> PMD_INIT_LOG(INFO, "Using simple rx/tx path");
> dev->tx_pkt_burst = virtio_xmit_pkts_simple;
> dev->rx_pkt_burst = virtio_recv_pkts_vec;
> - use_simple_rxtx = 1;
> + hw = dev->data->dev_private;
> + hw->use_simple_rxtx = 1;
> }
> #endif
>
>
> Instead of following scheme in existing patch,
>
> #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> + struct virtio_hw *hw = dev->data->dev_private;
> /* Use simple rx/tx func if single segment and no offloads */
> if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) ==
> VIRTIO_SIMPLE_FLAGS &&
> !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> PMD_INIT_LOG(INFO, "Using simple rx/tx path");
> dev->tx_pkt_burst = virtio_xmit_pkts_simple;
> dev->rx_pkt_burst = virtio_recv_pkts_vec;
> - use_simple_rxtx = 1;
> + hw->use_simple_rxtx = 1;
> }
> #endif
>
>
> The former case will have issue as "hw" been used in "if" with vtpci_with_feature.
Oh, my bad. I overlooked it. Sorry for that!
> OR
>
> if you meant just floating "struct virtio_hw *hw" without RTE_MACHINE_CPUFLAG_SSSE3
> then it comes error on non x86 as unused "hw" variable.
>
> If you meant something else then let me know?
I then prefer to keep the "#ifdef .. #endif" on top then. It will stop
us from offending a minor rule, while you can remove the ugly "#ifdef"
block in the next patch.
Works to you?
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 12:26 ` Yuanhan Liu
@ 2016-07-04 12:50 ` Jerin Jacob
2016-07-04 12:57 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-04 12:50 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 08:26:30PM +0800, Yuanhan Liu wrote:
> On Mon, Jul 04, 2016 at 05:45:57PM +0530, Jerin Jacob wrote:
> > On Mon, Jul 04, 2016 at 07:02:25PM +0800, Yuanhan Liu wrote:
> > > On Mon, Jul 04, 2016 at 02:37:55PM +0530, Jerin Jacob wrote:
> > > > On Mon, Jul 04, 2016 at 04:42:32PM +0800, Yuanhan Liu wrote:
> > > > > On Mon, Jul 04, 2016 at 02:06:27PM +0530, Jerin Jacob wrote:
> > > > > > On Mon, Jul 04, 2016 at 03:36:48PM +0800, Yuanhan Liu wrote:
> > > > > > > On Fri, Jul 01, 2016 at 04:46:36PM +0530, Jerin Jacob wrote:
> > > > > > > > @@ -494,9 +486,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > > > > {
> > > > > > > > uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
> > > > > > > >
> > > > > > > > -#ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > > > > - struct virtio_hw *hw = dev->data->dev_private;
> > > > > > > > -#endif
> > > > > > > > struct virtnet_tx *txvq;
> > > > > > > > struct virtqueue *vq;
> > > > > > > > uint16_t tx_free_thresh;
> > > > > > > > @@ -511,13 +500,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
> > > > > > > > }
> > > > > > > >
> > > > > > > > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > > > > > > > + struct virtio_hw *hw = dev->data->dev_private;
> > > > > > >
> > > > > > > I'd suggest to move above declaration to ...
> > > > > > >
> > > > > > > > /* Use simple rx/tx func if single segment and no offloads */
> > > > > > > > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
> > > > > > > > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > > > > > >
> > > > > > > here: we should try to avoid declaring vars in the middle of a code block.
> > > > > >
> > > > > > Next patch in this series, moving all rxtx handler selection code to
> > > > > > separate function(virtio_update_rxtx_handler()) where declaration comes
> > > > > > as first line in the function.i.e the comment is taken care of in the
> > > > > > series.
> > > > >
> > > > > Yes, I saw that. But in principle, each patch is atomic: it's not a
> > > > > good idea/practice to introduce issues in path A and then fix it in
> > > > > path B.
> > > >
> > > > In my view it was not an issue as I was removing all possible
> > > > conditional compilation flag. If I were to move the declaration to top
> > > > then another conditional compilation RTE_MACHINE_CPUFLAG_SSSE3
> > > > flag I need to add around declaring the variable.
> > >
> > > Nope, I was suggesting to move it inside the "if" block. So, this
> > > is actually consistent with what you are trying to do. Besides, it
> > > removes an declaration in the middle.
> >
> > Just to get the clarity on "moving inside the 'if' block"
> >
> > Are you suggesting to have like below?
> >
> > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > + struct virtio_hw *hw;
> > /* Use simple rx/tx func if single segment and no offloads */
> > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) ==
> > VIRTIO_SIMPLE_FLAGS &&
> > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > PMD_INIT_LOG(INFO, "Using simple rx/tx path");
> > dev->tx_pkt_burst = virtio_xmit_pkts_simple;
> > dev->rx_pkt_burst = virtio_recv_pkts_vec;
> > - use_simple_rxtx = 1;
> > + hw = dev->data->dev_private;
> > + hw->use_simple_rxtx = 1;
> > }
> > #endif
> >
> >
> > Instead of following scheme in existing patch,
> >
> > #ifdef RTE_MACHINE_CPUFLAG_SSSE3
> > + struct virtio_hw *hw = dev->data->dev_private;
> > /* Use simple rx/tx func if single segment and no offloads */
> > if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) ==
> > VIRTIO_SIMPLE_FLAGS &&
> > !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> > PMD_INIT_LOG(INFO, "Using simple rx/tx path");
> > dev->tx_pkt_burst = virtio_xmit_pkts_simple;
> > dev->rx_pkt_burst = virtio_recv_pkts_vec;
> > - use_simple_rxtx = 1;
> > + hw->use_simple_rxtx = 1;
> > }
> > #endif
> >
> >
> > The former case will have issue as "hw" been used in "if" with vtpci_with_feature.
>
> Oh, my bad. I overlooked it. Sorry for that!
>
> > OR
> >
> > if you meant just floating "struct virtio_hw *hw" without RTE_MACHINE_CPUFLAG_SSSE3
> > then it comes error on non x86 as unused "hw" variable.
> >
> > If you meant something else then let me know?
>
> I then prefer to keep the "#ifdef .. #endif" on top then. It will stop
> us from offending a minor rule, while you can remove the ugly "#ifdef"
> block in the next patch.
>
> Works to you?
OK. As you wish :-)
>
> --yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup
2016-07-04 12:50 ` Jerin Jacob
@ 2016-07-04 12:57 ` Yuanhan Liu
0 siblings, 0 replies; 44+ messages in thread
From: Yuanhan Liu @ 2016-07-04 12:57 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Mon, Jul 04, 2016 at 06:20:42PM +0530, Jerin Jacob wrote:
> > > The former case will have issue as "hw" been used in "if" with vtpci_with_feature.
> >
> > Oh, my bad. I overlooked it. Sorry for that!
> >
> > > OR
> > >
> > > if you meant just floating "struct virtio_hw *hw" without RTE_MACHINE_CPUFLAG_SSSE3
> > > then it comes error on non x86 as unused "hw" variable.
> > >
> > > If you meant something else then let me know?
> >
> > I then prefer to keep the "#ifdef .. #endif" on top then. It will stop
> > us from offending a minor rule, while you can remove the ugly "#ifdef"
> > block in the next patch.
> >
> > Works to you?
>
> OK. As you wish :-)
Thank you!
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
` (2 preceding siblings ...)
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 3/3] virtio: add neon support Jerin Jacob
@ 2016-07-05 12:49 ` Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 1/4] virtio: conditional compilation cleanup Jerin Jacob
` (4 more replies)
3 siblings, 5 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-07-05 12:49 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
This patch-set includes,
1) General cleanup of compile time dependency.
2) made vector handler section based on run-time cpuflags
2) Added NEON support for optimized Rx handling
This patch-set is based on dpdk-next-virtio/master
v3:
Address Yuanhan's review comments
http://dpdk.org/dev/patchwork/patch/14495/
http://dpdk.org/dev/patchwork/patch/14496/
v2:
- made vector handler selection based on run-time cpuflags (Suggested by Thomas)
- moved vector implementations to .c file instead of .h file(Suggested by Jianbo)
Jerin Jacob (4):
virtio: conditional compilation cleanup
virtio: move SSE based Rx implementation to separate file
virtio: add cpuflag based vector handler selection
virtio: add neon support
MAINTAINERS | 1 +
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/virtio/Makefile | 7 +-
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 63 ++++---
drivers/net/virtio/virtio_rxtx.h | 3 +-
drivers/net/virtio/virtio_rxtx_simple.c | 269 ++-------------------------
drivers/net/virtio/virtio_rxtx_simple.h | 133 +++++++++++++
drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++
drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++++++++++++++++++++++
drivers/net/virtio/virtio_user_ethdev.c | 1 +
11 files changed, 646 insertions(+), 291 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple.h
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.c
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v3 1/4] virtio: conditional compilation cleanup
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
@ 2016-07-05 12:49 ` Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
` (3 subsequent siblings)
4 siblings, 0 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-07-05 12:49 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Removed unnecessary compile time dependency on "use_simple_rxtx".
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/Makefile | 3 ---
drivers/net/virtio/virtio_pci.h | 1 +
drivers/net/virtio/virtio_rxtx.c | 24 ++++++++----------------
drivers/net/virtio/virtio_rxtx.h | 3 +--
drivers/net/virtio/virtio_rxtx_simple.c | 8 ++++++--
drivers/net/virtio/virtio_user_ethdev.c | 1 +
6 files changed, 17 insertions(+), 23 deletions(-)
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 3020b68..b9b0d8d 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,10 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE3)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
-endif
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index dd7693f..b8295a7 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -253,6 +253,7 @@ struct virtio_hw {
uint8_t use_msix;
uint8_t started;
uint8_t modern;
+ uint8_t use_simple_rxtx;
uint8_t mac_addr[ETHER_ADDR_LEN];
uint32_t notify_off_multiplier;
uint8_t *isr;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index a27208e..e707954 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -67,10 +67,6 @@
#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
ETH_TXQ_FLAGS_NOOFFLOADS)
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-static int use_simple_rxtx;
-#endif
-
static void
vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
{
@@ -333,6 +329,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
*/
uint16_t i;
uint16_t desc_idx;
+ struct virtio_hw *hw = dev->data->dev_private;
PMD_INIT_FUNC_TRACE();
@@ -353,8 +350,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
nbufs = 0;
error = ENOSPC;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx) {
+ if (hw->use_simple_rxtx) {
for (desc_idx = 0; desc_idx < vq->vq_nentries;
desc_idx++) {
vq->vq_ring.avail->ring[desc_idx] = desc_idx;
@@ -362,7 +358,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
VRING_DESC_F_WRITE;
}
}
-#endif
+
memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
desc_idx++) {
@@ -378,12 +374,11 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
/******************************************
* Enqueue allocated buffers *
*******************************************/
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx)
+ if (hw->use_simple_rxtx)
error = virtqueue_enqueue_recv_refill_simple(vq, m);
else
-#endif
error = virtqueue_enqueue_recv_refill(vq, m);
+
if (error) {
rte_pktmbuf_free(m);
break;
@@ -404,8 +399,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
struct virtqueue *vq = txvq->vq;
virtio_dev_vring_start(vq);
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx) {
+ if (hw->use_simple_rxtx) {
uint16_t mid_idx = vq->vq_nentries >> 1;
for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
@@ -426,7 +420,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
desc_idx++)
vq->vq_ring.avail->ring[desc_idx] = desc_idx;
}
-#endif
+
VIRTQUEUE_DUMP(vq);
}
}
@@ -456,9 +450,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
dev->data->rx_queues[queue_idx] = rxvq;
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
virtio_rxq_vec_setup(rxvq);
-#endif
return 0;
}
@@ -517,7 +509,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(INFO, "Using simple rx/tx path");
dev->tx_pkt_burst = virtio_xmit_pkts_simple;
dev->rx_pkt_burst = virtio_recv_pkts_vec;
- use_simple_rxtx = 1;
+ hw->use_simple_rxtx = 1;
}
#endif
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 058b56a..28f82d6 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -86,10 +86,9 @@ struct virtnet_ctl {
const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
};
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
-#endif
+
#endif /* _VIRTIO_RXTX_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 242ad90..67430da 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -37,8 +37,6 @@
#include <string.h>
#include <errno.h>
-#include <tmmintrin.h>
-
#include <rte_cycles.h>
#include <rte_memory.h>
#include <rte_memzone.h>
@@ -131,6 +129,10 @@ virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
vq_update_avail_idx(vq);
}
+#ifdef RTE_MACHINE_CPUFLAG_SSSE3
+
+#include <tmmintrin.h>
+
/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
*
* This routine is for non-mergeable RX, one desc for each guest buffer.
@@ -293,6 +295,8 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_pkts_received;
}
+#endif
+
#define VIRTIO_TX_FREE_THRESH 32
#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
#define VIRTIO_TX_FREE_NR 32
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 3d3c9da..3185a4c 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -303,6 +303,7 @@ virtio_user_eth_dev_alloc(const char *name)
hw->vtpci_ops = &virtio_user_ops;
hw->use_msix = 0;
hw->modern = 0;
+ hw->use_simple_rxtx = 0;
hw->virtio_user_dev = dev;
data->dev_private = hw;
data->numa_node = SOCKET_ID_ANY;
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 1/4] virtio: conditional compilation cleanup Jerin Jacob
@ 2016-07-05 12:49 ` Jerin Jacob
2016-08-18 6:52 ` Yuanhan Liu
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 3/4] virtio: add cpuflag based vector handler selection Jerin Jacob
` (2 subsequent siblings)
4 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-05 12:49 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Split out SSE instruction based virtio simple Rx
implementation to a separate file
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/Makefile | 4 +
drivers/net/virtio/virtio_rxtx_simple.c | 273 ++--------------------------
drivers/net/virtio/virtio_rxtx_simple.h | 133 ++++++++++++++
drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++++++++++++++++++++++
4 files changed, 370 insertions(+), 262 deletions(-)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple.h
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.c
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index b9b0d8d..c4103b7 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
+ifeq ($(CONFIG_RTE_ARCH_X86),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_sse.c
+endif
+
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 67430da..485ddce 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -51,14 +51,7 @@
#include <rte_errno.h>
#include <rte_byteorder.h>
-#include "virtio_logs.h"
-#include "virtio_ethdev.h"
-#include "virtqueue.h"
-#include "virtio_rxtx.h"
-
-#define RTE_VIRTIO_VPMD_RX_BURST 32
-#define RTE_VIRTIO_DESC_PER_LOOP 8
-#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+#include "virtio_rxtx_simple.h"
#ifndef __INTEL_COMPILER
#pragma GCC diagnostic ignored "-Wcast-qual"
@@ -89,260 +82,6 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
return 0;
}
-static inline void
-virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
-{
- int i;
- uint16_t desc_idx;
- struct rte_mbuf **sw_ring;
- struct vring_desc *start_dp;
- int ret;
- struct virtqueue *vq = rxvq->vq;
-
- desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
- sw_ring = &vq->sw_ring[desc_idx];
- start_dp = &vq->vq_ring.desc[desc_idx];
-
- ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
- RTE_VIRTIO_VPMD_RX_REARM_THRESH);
- if (unlikely(ret)) {
- rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- return;
- }
-
- for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
- uintptr_t p;
-
- p = (uintptr_t)&sw_ring[i]->rearm_data;
- *(uint64_t *)p = rxvq->mbuf_initializer;
-
- start_dp[i].addr =
- MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) -
- vq->hw->vtnet_hdr_size;
- start_dp[i].len = sw_ring[i]->buf_len -
- RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
- }
-
- vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- vq_update_avail_idx(vq);
-}
-
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-
-#include <tmmintrin.h>
-
-/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
- *
- * This routine is for non-mergeable RX, one desc for each guest buffer.
- * This routine is based on the RX ring layout optimization. Each entry in the
- * avail ring points to the desc with the same index in the desc ring and this
- * will never be changed in the driver.
- *
- * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
- */
-uint16_t
-virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
-{
- struct virtnet_rx *rxvq = rx_queue;
- struct virtqueue *vq = rxvq->vq;
- uint16_t nb_used;
- uint16_t desc_idx;
- struct vring_used_elem *rused;
- struct rte_mbuf **sw_ring;
- struct rte_mbuf **sw_ring_end;
- uint16_t nb_pkts_received;
- __m128i shuf_msk1, shuf_msk2, len_adjust;
-
- shuf_msk1 = _mm_set_epi8(
- 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, /* vlan tci */
- 5, 4, /* dat len */
- 0xFF, 0xFF, 5, 4, /* pkt len */
- 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
-
- );
-
- shuf_msk2 = _mm_set_epi8(
- 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, /* vlan tci */
- 13, 12, /* dat len */
- 0xFF, 0xFF, 13, 12, /* pkt len */
- 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
- );
-
- /* Subtract the header length.
- * In which case do we need the header length in used->len ?
- */
- len_adjust = _mm_set_epi16(
- 0, 0,
- 0,
- (uint16_t)-vq->hw->vtnet_hdr_size,
- 0, (uint16_t)-vq->hw->vtnet_hdr_size,
- 0, 0);
-
- if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
- return 0;
-
- nb_used = VIRTQUEUE_NUSED(vq);
-
- rte_compiler_barrier();
-
- if (unlikely(nb_used == 0))
- return 0;
-
- nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
- nb_used = RTE_MIN(nb_used, nb_pkts);
-
- desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
- rused = &vq->vq_ring.used->ring[desc_idx];
- sw_ring = &vq->sw_ring[desc_idx];
- sw_ring_end = &vq->sw_ring[vq->vq_nentries];
-
- _mm_prefetch((const void *)rused, _MM_HINT_T0);
-
- if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
- virtio_rxq_rearm_vec(rxvq);
- if (unlikely(virtqueue_kick_prepare(vq)))
- virtqueue_notify(vq);
- }
-
- for (nb_pkts_received = 0;
- nb_pkts_received < nb_used;) {
- __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
- __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
- __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
-
- mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
- desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
- _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
-
- mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
- desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
- _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
-
- mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
- desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
- _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
-
- mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
- desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
- _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
-
- pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
- pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
- pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
- pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
- pkt_mb[1]);
- _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
- pkt_mb[0]);
-
- pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
- pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
- pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
- pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
- pkt_mb[3]);
- _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
- pkt_mb[2]);
-
- pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
- pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
- pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
- pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
- pkt_mb[5]);
- _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
- pkt_mb[4]);
-
- pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
- pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
- pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
- pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
- _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
- pkt_mb[7]);
- _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
- pkt_mb[6]);
-
- if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
- if (sw_ring + nb_used <= sw_ring_end)
- nb_pkts_received += nb_used;
- else
- nb_pkts_received += sw_ring_end - sw_ring;
- break;
- } else {
- if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
- sw_ring_end)) {
- nb_pkts_received += sw_ring_end - sw_ring;
- break;
- } else {
- nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
-
- rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
- sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
- rused += RTE_VIRTIO_DESC_PER_LOOP;
- nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
- }
- }
- }
-
- vq->vq_used_cons_idx += nb_pkts_received;
- vq->vq_free_cnt += nb_pkts_received;
- rxvq->stats.packets += nb_pkts_received;
- return nb_pkts_received;
-}
-
-#endif
-
-#define VIRTIO_TX_FREE_THRESH 32
-#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
-#define VIRTIO_TX_FREE_NR 32
-/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */
-static inline void
-virtio_xmit_cleanup(struct virtqueue *vq)
-{
- uint16_t i, desc_idx;
- int nb_free = 0;
- struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
-
- desc_idx = (uint16_t)(vq->vq_used_cons_idx &
- ((vq->vq_nentries >> 1) - 1));
- m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
- m = __rte_pktmbuf_prefree_seg(m);
- if (likely(m != NULL)) {
- free[0] = m;
- nb_free = 1;
- for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
- m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
- m = __rte_pktmbuf_prefree_seg(m);
- if (likely(m != NULL)) {
- if (likely(m->pool == free[0]->pool))
- free[nb_free++] = m;
- else {
- rte_mempool_put_bulk(free[0]->pool,
- (void **)free, nb_free);
- free[0] = m;
- nb_free = 1;
- }
- }
- }
- rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
- } else {
- for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
- m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
- m = __rte_pktmbuf_prefree_seg(m);
- if (m != NULL)
- rte_mempool_put(m->pool, m);
- }
- }
-
- vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
- vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
-}
-
uint16_t
virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
@@ -423,3 +162,13 @@ virtio_rxq_vec_setup(struct virtnet_rx *rxq)
return 0;
}
+
+/* Stub for linkage when arch specific implementation is not available */
+uint16_t __attribute__((weak))
+virtio_recv_pkts_vec(void *rx_queue __rte_unused,
+ struct rte_mbuf **rx_pkts __rte_unused,
+ uint16_t nb_pkts __rte_unused)
+{
+ rte_panic("Wrong weak function linked by linker\n");
+ return 0;
+}
diff --git a/drivers/net/virtio/virtio_rxtx_simple.h b/drivers/net/virtio/virtio_rxtx_simple.h
new file mode 100644
index 0000000..8cb43c0
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.h
@@ -0,0 +1,133 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_RXTX_SIMPLE_H_
+#define _VIRTIO_RXTX_SIMPLE_H_
+
+#include <stdint.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+static inline void
+virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
+{
+ int i;
+ uint16_t desc_idx;
+ struct rte_mbuf **sw_ring;
+ struct vring_desc *start_dp;
+ int ret;
+ struct virtqueue *vq = rxvq->vq;
+
+ desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+ sw_ring = &vq->sw_ring[desc_idx];
+ start_dp = &vq->vq_ring.desc[desc_idx];
+
+ ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+ RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+ if (unlikely(ret)) {
+ rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ return;
+ }
+
+ for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+ uintptr_t p;
+
+ p = (uintptr_t)&sw_ring[i]->rearm_data;
+ *(uint64_t *)p = rxvq->mbuf_initializer;
+
+ start_dp[i].addr =
+ MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) -
+ vq->hw->vtnet_hdr_size;
+ start_dp[i].len = sw_ring[i]->buf_len -
+ RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
+ }
+
+ vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ vq_update_avail_idx(vq);
+}
+
+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+ uint16_t i, desc_idx;
+ int nb_free = 0;
+ struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+ ((vq->vq_nentries >> 1) - 1));
+ m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+ m = __rte_pktmbuf_prefree_seg(m);
+ if (likely(m != NULL)) {
+ free[0] = m;
+ nb_free = 1;
+ for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+ m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+ m = __rte_pktmbuf_prefree_seg(m);
+ if (likely(m != NULL)) {
+ if (likely(m->pool == free[0]->pool))
+ free[nb_free++] = m;
+ else {
+ rte_mempool_put_bulk(free[0]->pool,
+ (void **)free, nb_free);
+ free[0] = m;
+ nb_free = 1;
+ }
+ }
+ }
+ rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+ } else {
+ for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+ m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+ m = __rte_pktmbuf_prefree_seg(m);
+ if (m != NULL)
+ rte_mempool_put(m->pool, m);
+ }
+ }
+
+ vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+ vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+#endif /* _VIRTIO_RXTX_SIMPLE_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple_sse.c b/drivers/net/virtio/virtio_rxtx_simple_sse.c
new file mode 100644
index 0000000..39000e8
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple_sse.c
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tmmintrin.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+
+#include "virtio_rxtx_simple.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
+ uint16_t nb_used;
+ uint16_t desc_idx;
+ struct vring_used_elem *rused;
+ struct rte_mbuf **sw_ring;
+ struct rte_mbuf **sw_ring_end;
+ uint16_t nb_pkts_received;
+ __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+ shuf_msk1 = _mm_set_epi8(
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, /* vlan tci */
+ 5, 4, /* dat len */
+ 0xFF, 0xFF, 5, 4, /* pkt len */
+ 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
+
+ );
+
+ shuf_msk2 = _mm_set_epi8(
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, /* vlan tci */
+ 13, 12, /* dat len */
+ 0xFF, 0xFF, 13, 12, /* pkt len */
+ 0xFF, 0xFF, 0xFF, 0xFF /* packet type */
+ );
+
+ /* Subtract the header length.
+ * In which case do we need the header length in used->len ?
+ */
+ len_adjust = _mm_set_epi16(
+ 0, 0,
+ 0,
+ (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, 0);
+
+ if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+ return 0;
+
+ nb_used = VIRTQUEUE_NUSED(vq);
+
+ rte_compiler_barrier();
+
+ if (unlikely(nb_used == 0))
+ return 0;
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+ nb_used = RTE_MIN(nb_used, nb_pkts);
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+ _mm_prefetch((const void *)rused, _MM_HINT_T0);
+
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ virtio_rxq_rearm_vec(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
+ }
+
+ for (nb_pkts_received = 0;
+ nb_pkts_received < nb_used;) {
+ __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+ mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
+ desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
+ _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
+
+ mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
+ desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
+ _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
+
+ mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
+ desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
+ _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
+
+ mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
+ desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
+ _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
+
+ pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
+ pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
+ pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
+ pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
+ pkt_mb[1]);
+ _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
+ pkt_mb[0]);
+
+ pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
+ pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
+ pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
+ pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
+ pkt_mb[3]);
+ _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
+ pkt_mb[2]);
+
+ pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
+ pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
+ pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
+ pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
+ pkt_mb[5]);
+ _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
+ pkt_mb[4]);
+
+ pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
+ pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
+ pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
+ pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
+ _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
+ pkt_mb[7]);
+ _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
+ pkt_mb[6]);
+
+ if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+ if (sw_ring + nb_used <= sw_ring_end)
+ nb_pkts_received += nb_used;
+ else
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+ sw_ring_end)) {
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+ rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+ sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+ rused += RTE_VIRTIO_DESC_PER_LOOP;
+ nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+ }
+ }
+ }
+
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
+ return nb_pkts_received;
+}
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v3 3/4] virtio: add cpuflag based vector handler selection
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 1/4] virtio: conditional compilation cleanup Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
@ 2016-07-05 12:49 ` Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 4/4] virtio: add neon support Jerin Jacob
2016-09-28 0:37 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Yuanhan Liu
4 siblings, 0 replies; 44+ messages in thread
From: Jerin Jacob @ 2016-07-05 12:49 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Introduced cpuflag based run-time detection to select the
SSE based simple Rx handler
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/virtio/virtio_rxtx.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index e707954..adc3457 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -50,6 +50,7 @@
#include <rte_string_fns.h>
#include <rte_errno.h>
#include <rte_byteorder.h>
+#include <rte_cpuflags.h>
#include "virtio_logs.h"
#include "virtio_ethdev.h"
@@ -470,6 +471,28 @@ virtio_dev_rx_queue_release(void *rxq)
rte_memzone_free(mz);
}
+static void
+virtio_update_rxtx_handler(struct rte_eth_dev *dev,
+ const struct rte_eth_txconf *tx_conf)
+{
+ uint8_t use_simple_rxtx = 0;
+ struct virtio_hw *hw = dev->data->dev_private;
+
+#if defined RTE_ARCH_X86
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
+ use_simple_rxtx = 1;
+#endif
+ /* Use simple rx/tx func if single segment and no offloads */
+ if (use_simple_rxtx &&
+ (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
+ !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+ PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+ dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+ dev->rx_pkt_burst = virtio_recv_pkts_vec;
+ hw->use_simple_rxtx = use_simple_rxtx;
+ }
+}
+
/*
* struct rte_eth_dev *dev: Used to update dev
* uint16_t nb_desc: Defaults to values read from config space
@@ -485,10 +508,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
const struct rte_eth_txconf *tx_conf)
{
uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
-
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- struct virtio_hw *hw = dev->data->dev_private;
-#endif
struct virtnet_tx *txvq;
struct virtqueue *vq;
uint16_t tx_free_thresh;
@@ -502,16 +521,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- /* Use simple rx/tx func if single segment and no offloads */
- if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
- !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
- PMD_INIT_LOG(INFO, "Using simple rx/tx path");
- dev->tx_pkt_burst = virtio_xmit_pkts_simple;
- dev->rx_pkt_burst = virtio_recv_pkts_vec;
- hw->use_simple_rxtx = 1;
- }
-#endif
+ virtio_update_rxtx_handler(dev, tx_conf);
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, (void **)&txvq);
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* [dpdk-dev] [PATCH v3 4/4] virtio: add neon support
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
` (2 preceding siblings ...)
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 3/4] virtio: add cpuflag based vector handler selection Jerin Jacob
@ 2016-07-05 12:49 ` Jerin Jacob
2016-07-06 3:11 ` Jianbo Liu
2016-09-28 0:37 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Yuanhan Liu
4 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-07-05 12:49 UTC (permalink / raw)
To: dev
Cc: thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie,
yuanhan.liu, Jerin Jacob
Added neon based Rx vector implementation.
Selection of the new handler based neon availability at runtime.
Updated the release notes and MAINTAINERS file.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
MAINTAINERS | 1 +
doc/guides/rel_notes/release_16_07.rst | 2 +
drivers/net/virtio/Makefile | 2 +
drivers/net/virtio/virtio_rxtx.c | 3 +
drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++++++
5 files changed, 243 insertions(+)
create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
diff --git a/MAINTAINERS b/MAINTAINERS
index a59191e..ab04cee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -143,6 +143,7 @@ F: lib/librte_acl/acl_run_neon.*
F: lib/librte_lpm/rte_lpm_neon.h
F: lib/librte_hash/rte*_arm64.h
F: drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+F: drivers/net/virtio/virtio_rxtx_simple_neon.c
EZchip TILE-Gx
M: Zhigang Lu <zlu@ezchip.com>
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 569f562..57f3d28 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -180,6 +180,8 @@ New Features
section of the "Network Interface Controller Drivers" document.
+* **Virtio NEON support for ARM.**
+
Resolved Issues
---------------
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index c4103b7..97972a6 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -54,6 +54,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
ifeq ($(CONFIG_RTE_ARCH_X86),y)
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_sse.c
+else ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c
endif
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index adc3457..8f6cad8 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -481,6 +481,9 @@ virtio_update_rxtx_handler(struct rte_eth_dev *dev,
#if defined RTE_ARCH_X86
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
use_simple_rxtx = 1;
+#elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+ use_simple_rxtx = 1;
#endif
/* Use simple rx/tx func if single segment and no offloads */
if (use_simple_rxtx &&
diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c b/drivers/net/virtio/virtio_rxtx_simple_neon.c
new file mode 100644
index 0000000..793eefb
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c
@@ -0,0 +1,235 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2016
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_vect.h>
+
+#include "virtio_rxtx_simple.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
+ uint16_t nb_used;
+ uint16_t desc_idx;
+ struct vring_used_elem *rused;
+ struct rte_mbuf **sw_ring;
+ struct rte_mbuf **sw_ring_end;
+ uint16_t nb_pkts_received;
+
+ uint8x16_t shuf_msk1 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+ 4, 5, 0xFF, 0xFF, /* pkt len */
+ 4, 5, /* dat len */
+ 0xFF, 0xFF, /* vlan tci */
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ uint8x16_t shuf_msk2 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+ 12, 13, 0xFF, 0xFF, /* pkt len */
+ 12, 13, /* dat len */
+ 0xFF, 0xFF, /* vlan tci */
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ /* Subtract the header length.
+ * In which case do we need the header length in used->len ?
+ */
+ uint16x8_t len_adjust = {
+ 0, 0,
+ (uint16_t)vq->hw->vtnet_hdr_size, 0,
+ (uint16_t)vq->hw->vtnet_hdr_size,
+ 0,
+ 0, 0
+ };
+
+ if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+ return 0;
+
+ nb_used = VIRTQUEUE_NUSED(vq);
+
+ rte_rmb();
+
+ if (unlikely(nb_used == 0))
+ return 0;
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+ nb_used = RTE_MIN(nb_used, nb_pkts);
+
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+ rte_prefetch_non_temporal(rused);
+
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ virtio_rxq_rearm_vec(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
+ }
+
+ for (nb_pkts_received = 0;
+ nb_pkts_received < nb_used;) {
+ uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+ uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+ mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0));
+ desc[0] = vld1q_u64((uint64_t *)(rused + 0));
+ vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]);
+
+ mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2));
+ desc[1] = vld1q_u64((uint64_t *)(rused + 2));
+ vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]);
+
+ mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4));
+ desc[2] = vld1q_u64((uint64_t *)(rused + 4));
+ vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]);
+
+ mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6));
+ desc[3] = vld1q_u64((uint64_t *)(rused + 6));
+ vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]);
+
+ pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[0]), shuf_msk2));
+ pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[0]), shuf_msk1));
+ pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[1]), len_adjust));
+ pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[0]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1,
+ pkt_mb[1]);
+ vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1,
+ pkt_mb[0]);
+
+ pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[1]), shuf_msk2));
+ pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[1]), shuf_msk1));
+ pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[3]), len_adjust));
+ pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[2]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1,
+ pkt_mb[3]);
+ vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1,
+ pkt_mb[2]);
+
+ pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[2]), shuf_msk2));
+ pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[2]), shuf_msk1));
+ pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[5]), len_adjust));
+ pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[4]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1,
+ pkt_mb[5]);
+ vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1,
+ pkt_mb[4]);
+
+ pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[3]), shuf_msk2));
+ pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8(
+ vreinterpretq_u8_u64(desc[3]), shuf_msk1));
+ pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[7]), len_adjust));
+ pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16(
+ vreinterpretq_u16_u64(pkt_mb[6]), len_adjust));
+ vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1,
+ pkt_mb[7]);
+ vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1,
+ pkt_mb[6]);
+
+ if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+ if (sw_ring + nb_used <= sw_ring_end)
+ nb_pkts_received += nb_used;
+ else
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+ sw_ring_end)) {
+ nb_pkts_received += sw_ring_end - sw_ring;
+ break;
+ } else {
+ nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+ rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+ sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+ rused += RTE_VIRTIO_DESC_PER_LOOP;
+ nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+ }
+ }
+ }
+
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
+ return nb_pkts_received;
+}
--
2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v3 4/4] virtio: add neon support
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 4/4] virtio: add neon support Jerin Jacob
@ 2016-07-06 3:11 ` Jianbo Liu
0 siblings, 0 replies; 44+ messages in thread
From: Jianbo Liu @ 2016-07-06 3:11 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, Thomas Monjalon, Bruce Richardson, huawei.xie, yuanhan.liu
On 5 July 2016 at 20:49, Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
> Added neon based Rx vector implementation.
> Selection of the new handler based neon availability at runtime.
> Updated the release notes and MAINTAINERS file.
>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
> MAINTAINERS | 1 +
> doc/guides/rel_notes/release_16_07.rst | 2 +
> drivers/net/virtio/Makefile | 2 +
> drivers/net/virtio/virtio_rxtx.c | 3 +
> drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++++++
> 5 files changed, 243 insertions(+)
> create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
>
Acked-by: Jianbo Liu <jianbo.liu@linaro.org>
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
@ 2016-08-18 6:52 ` Yuanhan Liu
2016-08-19 3:24 ` Jerin Jacob
0 siblings, 1 reply; 44+ messages in thread
From: Yuanhan Liu @ 2016-08-18 6:52 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Tue, Jul 05, 2016 at 06:19:24PM +0530, Jerin Jacob wrote:
> Split out SSE instruction based virtio simple Rx
> implementation to a separate file
>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Hi,
I was about to apply this set. I then did some build test and found a
weird issue: it breaks the build with clang (ubuntu 16.04).
drivers/net/virtio/virtio_rxtx_simple_sse.c:130:2: error: cast from 'const void *' to 'void *' drops const qualifier [-Werror,-Wcast-qual]
_mm_prefetch((const void *)rused, _MM_HINT_T0);
^
/usr/lib/llvm-3.8/bin/../lib/clang/3.8.0/include/xmmintrin.h:684:58: note: expanded from macro '_mm_prefetch'
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
^
1 error generated.
Weird enough I don't see this issue before this commit: the error
line is exactly the same before and after this commit.
Another note is that _mm_prefetch() is actually with different prototype
for gcc and clang. For gcc, we have:
_mm_prefetch (const void *__P, enum _mm_hint __I)
Any thoughts?
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file
2016-08-18 6:52 ` Yuanhan Liu
@ 2016-08-19 3:24 ` Jerin Jacob
2016-08-23 7:43 ` Yuanhan Liu
0 siblings, 1 reply; 44+ messages in thread
From: Jerin Jacob @ 2016-08-19 3:24 UTC (permalink / raw)
To: Yuanhan Liu
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Thu, Aug 18, 2016 at 02:52:31PM +0800, Yuanhan Liu wrote:
> On Tue, Jul 05, 2016 at 06:19:24PM +0530, Jerin Jacob wrote:
> > Split out SSE instruction based virtio simple Rx
> > implementation to a separate file
> >
> > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>
> Hi,
>
> I was about to apply this set. I then did some build test and found a
> weird issue: it breaks the build with clang (ubuntu 16.04).
>
> drivers/net/virtio/virtio_rxtx_simple_sse.c:130:2: error: cast from 'const void *' to 'void *' drops const qualifier [-Werror,-Wcast-qual]
> _mm_prefetch((const void *)rused, _MM_HINT_T0);
> ^
> /usr/lib/llvm-3.8/bin/../lib/clang/3.8.0/include/xmmintrin.h:684:58: note: expanded from macro '_mm_prefetch'
> #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
> ^
> 1 error generated.
>
> Weird enough I don't see this issue before this commit: the error
> line is exactly the same before and after this commit.
Yes, I looked at the pre processed output as well, it comes as same before and
after this commit.
>
> Another note is that _mm_prefetch() is actually with different prototype
> for gcc and clang. For gcc, we have:
>
> _mm_prefetch (const void *__P, enum _mm_hint __I)
>
> Any thoughts?
How about replacing "_mm_prefetch((const void *)rused, _MM_HINT_T0)"
with "rte_prefetch0(rused)" to have same prototype and fix the issue
with clang?
>
> --yliu
>
>
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file
2016-08-19 3:24 ` Jerin Jacob
@ 2016-08-23 7:43 ` Yuanhan Liu
0 siblings, 0 replies; 44+ messages in thread
From: Yuanhan Liu @ 2016-08-23 7:43 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Fri, Aug 19, 2016 at 08:54:00AM +0530, Jerin Jacob wrote:
> On Thu, Aug 18, 2016 at 02:52:31PM +0800, Yuanhan Liu wrote:
> > On Tue, Jul 05, 2016 at 06:19:24PM +0530, Jerin Jacob wrote:
> > > Split out SSE instruction based virtio simple Rx
> > > implementation to a separate file
> > >
> > > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> >
> > Hi,
> >
> > I was about to apply this set. I then did some build test and found a
> > weird issue: it breaks the build with clang (ubuntu 16.04).
> >
> > drivers/net/virtio/virtio_rxtx_simple_sse.c:130:2: error: cast from 'const void *' to 'void *' drops const qualifier [-Werror,-Wcast-qual]
> > _mm_prefetch((const void *)rused, _MM_HINT_T0);
> > ^
> > /usr/lib/llvm-3.8/bin/../lib/clang/3.8.0/include/xmmintrin.h:684:58: note: expanded from macro '_mm_prefetch'
> > #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
> > ^
> > 1 error generated.
> >
> > Weird enough I don't see this issue before this commit: the error
> > line is exactly the same before and after this commit.
>
> Yes, I looked at the pre processed output as well, it comes as same before and
> after this commit.
>
> >
> > Another note is that _mm_prefetch() is actually with different prototype
> > for gcc and clang. For gcc, we have:
> >
> > _mm_prefetch (const void *__P, enum _mm_hint __I)
> >
> > Any thoughts?
>
> How about replacing "_mm_prefetch((const void *)rused, _MM_HINT_T0)"
> with "rte_prefetch0(rused)" to have same prototype and fix the issue
> with clang?
Yes, it should work. BTW, I have just sent out a patch for that. I will
apply yours when that patch has been applied.
--yliu
^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
` (3 preceding siblings ...)
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 4/4] virtio: add neon support Jerin Jacob
@ 2016-09-28 0:37 ` Yuanhan Liu
4 siblings, 0 replies; 44+ messages in thread
From: Yuanhan Liu @ 2016-09-28 0:37 UTC (permalink / raw)
To: Jerin Jacob
Cc: dev, thomas.monjalon, bruce.richardson, jianbo.liu, huawei.xie
On Tue, Jul 05, 2016 at 06:19:22PM +0530, Jerin Jacob wrote:
> This patch-set includes,
>
> 1) General cleanup of compile time dependency.
> 2) made vector handler section based on run-time cpuflags
> 2) Added NEON support for optimized Rx handling
>
> This patch-set is based on dpdk-next-virtio/master
Now the weird build issue is gone, this series is applied to
dpdk-next-virtio.
I addressed some conflict issues; you might want to have a
simple check to make sure I don't mess something up. Or even
better, if you could do a test.
Thanks.
--yliu
>
> v3:
> Address Yuanhan's review comments
> http://dpdk.org/dev/patchwork/patch/14495/
> http://dpdk.org/dev/patchwork/patch/14496/
>
> v2:
> - made vector handler selection based on run-time cpuflags (Suggested by Thomas)
> - moved vector implementations to .c file instead of .h file(Suggested by Jianbo)
>
> Jerin Jacob (4):
> virtio: conditional compilation cleanup
> virtio: move SSE based Rx implementation to separate file
> virtio: add cpuflag based vector handler selection
> virtio: add neon support
>
> MAINTAINERS | 1 +
> doc/guides/rel_notes/release_16_07.rst | 2 +
> drivers/net/virtio/Makefile | 7 +-
> drivers/net/virtio/virtio_pci.h | 1 +
> drivers/net/virtio/virtio_rxtx.c | 63 ++++---
> drivers/net/virtio/virtio_rxtx.h | 3 +-
> drivers/net/virtio/virtio_rxtx_simple.c | 269 ++-------------------------
> drivers/net/virtio/virtio_rxtx_simple.h | 133 +++++++++++++
> drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 +++++++++++++++++++++++
> drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++++++++++++++++++++++
> drivers/net/virtio/virtio_user_ethdev.c | 1 +
> 11 files changed, 646 insertions(+), 291 deletions(-)
> create mode 100644 drivers/net/virtio/virtio_rxtx_simple.h
> create mode 100644 drivers/net/virtio/virtio_rxtx_simple_neon.c
> create mode 100644 drivers/net/virtio/virtio_rxtx_simple_sse.c
>
> --
> 2.5.5
^ permalink raw reply [flat|nested] 44+ messages in thread
end of thread, other threads:[~2016-09-28 0:36 UTC | newest]
Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-27 11:54 [dpdk-dev] [PATCH 0/4] Virtio NEON support for ARM Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 1/4] virtio: Fix compile time dependency of use_simple_rxtx usage Jerin Jacob
2016-06-27 11:54 ` [dpdk-dev] [PATCH 2/4] virtio: introduce RTE_LIBRTE_VIRTIO_INC_VECTOR Jerin Jacob
2016-06-27 14:19 ` Thomas Monjalon
2016-06-27 14:48 ` Jerin Jacob
2016-06-27 14:59 ` Thomas Monjalon
2016-06-29 11:18 ` Jerin Jacob
2016-06-29 11:25 ` Thomas Monjalon
2016-06-29 11:40 ` Jerin Jacob
2016-06-30 5:44 ` Yuanhan Liu
2016-06-27 11:54 ` [dpdk-dev] [PATCH 3/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
2016-06-28 6:17 ` Jianbo Liu
2016-06-29 11:27 ` Jerin Jacob
2016-06-30 5:43 ` Yuanhan Liu
2016-06-27 11:54 ` [dpdk-dev] [PATCH 4/4] virtio: add neon support Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] From: Jerin Jacob <jerin.jacob@caviumnetworks.com> Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 1/3] virtio: conditional compilation cleanup Jerin Jacob
2016-07-04 7:36 ` Yuanhan Liu
2016-07-04 8:36 ` Jerin Jacob
2016-07-04 8:42 ` Yuanhan Liu
2016-07-04 9:07 ` Jerin Jacob
2016-07-04 11:02 ` Yuanhan Liu
2016-07-04 12:15 ` Jerin Jacob
2016-07-04 12:26 ` Yuanhan Liu
2016-07-04 12:50 ` Jerin Jacob
2016-07-04 12:57 ` Yuanhan Liu
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 2/3] virtio: move SSE based Rx implementation to separate file Jerin Jacob
2016-07-04 7:42 ` Yuanhan Liu
2016-07-04 8:38 ` Jerin Jacob
2016-07-01 11:16 ` [dpdk-dev] [PATCH v2 3/3] virtio: add neon support Jerin Jacob
2016-07-04 7:53 ` Yuanhan Liu
2016-07-04 8:55 ` Jerin Jacob
2016-07-04 9:02 ` Yuanhan Liu
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 1/4] virtio: conditional compilation cleanup Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 2/4] virtio: move SSE based Rx implementation to separate file Jerin Jacob
2016-08-18 6:52 ` Yuanhan Liu
2016-08-19 3:24 ` Jerin Jacob
2016-08-23 7:43 ` Yuanhan Liu
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 3/4] virtio: add cpuflag based vector handler selection Jerin Jacob
2016-07-05 12:49 ` [dpdk-dev] [PATCH v3 4/4] virtio: add neon support Jerin Jacob
2016-07-06 3:11 ` Jianbo Liu
2016-09-28 0:37 ` [dpdk-dev] [PATCH v3 0/4] Virtio NEON support for ARM Yuanhan Liu
2016-07-01 11:19 ` [dpdk-dev] [PATCH v2 0/3] " Jerin Jacob
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).