From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: Marvin Liu <yong.liu@intel.com>,
xiaolong.ye@intel.com, zhihong.wang@intel.com
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v10 6/9] net/virtio: add vectorized packed ring Rx path
Date: Mon, 27 Apr 2020 13:20:53 +0200 [thread overview]
Message-ID: <672a584a-46d1-c78b-7b21-9ed7bc060814@redhat.com> (raw)
In-Reply-To: <20200426021943.43158-7-yong.liu@intel.com>
On 4/26/20 4:19 AM, Marvin Liu wrote:
> Optimize packed ring Rx path with SIMD instructions. Solution of
> optimization is pretty like vhost, is that split path into batch and
> single functions. Batch function is further optimized by AVX512
> instructions. Also pad desc extra structure to 16 bytes aligned, thus
> four elements will be saved in one batch.
>
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
> index c9edb84ee..102b1deab 100644
> --- a/drivers/net/virtio/Makefile
> +++ b/drivers/net/virtio/Makefile
> @@ -36,6 +36,41 @@ else ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
> SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c
> endif
>
> +ifneq ($(FORCE_DISABLE_AVX512), y)
> + CC_AVX512_SUPPORT=\
> + $(shell $(CC) -march=native -dM -E - </dev/null 2>&1 | \
> + sed '/./{H;$$!d} ; x ; /AVX512F/!d; /AVX512BW/!d; /AVX512VL/!d' | \
> + grep -q AVX512 && echo 1)
> +endif
> +
> +ifeq ($(CC_AVX512_SUPPORT), 1)
> +CFLAGS += -DCC_AVX512_SUPPORT
> +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_packed_avx.c
> +
> +ifeq ($(RTE_TOOLCHAIN), gcc)
> +ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
> +CFLAGS += -DVIRTIO_GCC_UNROLL_PRAGMA
> +endif
> +endif
> +
> +ifeq ($(RTE_TOOLCHAIN), clang)
> +ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)
> +CFLAGS += -DVIRTIO_CLANG_UNROLL_PRAGMA
> +endif
> +endif
> +
> +ifeq ($(RTE_TOOLCHAIN), icc)
> +ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)
> +CFLAGS += -DVIRTIO_ICC_UNROLL_PRAGMA
> +endif
> +endif
> +
> +CFLAGS_virtio_rxtx_packed_avx.o += -mavx512f -mavx512bw -mavx512vl
> +ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1)
> +CFLAGS_virtio_rxtx_packed_avx.o += -Wno-zero-length-bounds
> +endif
> +endif
> +
> ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
> SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
> SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c
> diff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build
> index 15150eea1..8e68c3039 100644
> --- a/drivers/net/virtio/meson.build
> +++ b/drivers/net/virtio/meson.build
> @@ -9,6 +9,20 @@ sources += files('virtio_ethdev.c',
> deps += ['kvargs', 'bus_pci']
>
> if arch_subdir == 'x86'
> + if '-mno-avx512f' not in machine_args
> + if cc.has_argument('-mavx512f') and cc.has_argument('-mavx512vl') and cc.has_argument('-mavx512bw')
> + cflags += ['-mavx512f', '-mavx512bw', '-mavx512vl']
> + cflags += ['-DCC_AVX512_SUPPORT']
> + if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))
> + cflags += '-DVHOST_GCC_UNROLL_PRAGMA'
> + elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))
> + cflags += '-DVHOST_CLANG_UNROLL_PRAGMA'
> + elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
> + cflags += '-DVHOST_ICC_UNROLL_PRAGMA'
> + endif
> + sources += files('virtio_rxtx_packed_avx.c')
> + endif
> + endif
> sources += files('virtio_rxtx_simple_sse.c')
> elif arch_subdir == 'ppc'
> sources += files('virtio_rxtx_simple_altivec.c')
> diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
> index febaf17a8..5c112cac7 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -105,6 +105,9 @@ uint16_t virtio_xmit_pkts_inorder(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t nb_pkts);
>
> +uint16_t virtio_recv_pkts_packed_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts);
> +
> int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
>
> void virtio_interrupt_handler(void *param);
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index a549991aa..534562cca 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -2030,3 +2030,11 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>
> return nb_tx;
> }
> +
> +__rte_weak uint16_t
> +virtio_recv_pkts_packed_vec(void *rx_queue __rte_unused,
> + struct rte_mbuf **rx_pkts __rte_unused,
> + uint16_t nb_pkts __rte_unused)
> +{
> + return 0;
> +}
> diff --git a/drivers/net/virtio/virtio_rxtx_packed_avx.c b/drivers/net/virtio/virtio_rxtx_packed_avx.c
> new file mode 100644
> index 000000000..8a7b459eb
> --- /dev/null
> +++ b/drivers/net/virtio/virtio_rxtx_packed_avx.c
> @@ -0,0 +1,374 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2010-2020 Intel Corporation
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <errno.h>
> +
> +#include <rte_net.h>
> +
> +#include "virtio_logs.h"
> +#include "virtio_ethdev.h"
> +#include "virtio_pci.h"
> +#include "virtqueue.h"
> +
> +#define BYTE_SIZE 8
> +/* flag bits offset in packed ring desc higher 64bits */
> +#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
> + offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
> +
> +#define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \
> + FLAGS_BITS_OFFSET)
> +
> +#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
> + sizeof(struct vring_packed_desc))
> +#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
> +
> +#ifdef VIRTIO_GCC_UNROLL_PRAGMA
> +#define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
> + for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifdef VIRTIO_CLANG_UNROLL_PRAGMA
> +#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
> + for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifdef VIRTIO_ICC_UNROLL_PRAGMA
> +#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
> + for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifndef virtio_for_each_try_unroll
> +#define virtio_for_each_try_unroll(iter, val, num) \
> + for (iter = val; iter < num; iter++)
> +#endif
> +
> +static inline void
> +virtio_update_batch_stats(struct virtnet_stats *stats,
> + uint16_t pkt_len1,
> + uint16_t pkt_len2,
> + uint16_t pkt_len3,
> + uint16_t pkt_len4)
> +{
> + stats->bytes += pkt_len1;
> + stats->bytes += pkt_len2;
> + stats->bytes += pkt_len3;
> + stats->bytes += pkt_len4;
> +}
> +
> +/* Optionally fill offload information in structure */
> +static inline int
> +virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
> +{
> + struct rte_net_hdr_lens hdr_lens;
> + uint32_t hdrlen, ptype;
> + int l4_supported = 0;
> +
> + /* nothing to do */
> + if (hdr->flags == 0)
> + return 0;
> +
> + /* GSO not support in vec path, skip check */
> + m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
> +
> + ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
> + m->packet_type = ptype;
> + if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
> + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
> + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
> + l4_supported = 1;
> +
> + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> + hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
> + if (hdr->csum_start <= hdrlen && l4_supported) {
> + m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
> + } else {
> + /* Unknown proto or tunnel, do sw cksum. We can assume
> + * the cksum field is in the first segment since the
> + * buffers we provided to the host are large enough.
> + * In case of SCTP, this will be wrong since it's a CRC
> + * but there's nothing we can do.
> + */
> + uint16_t csum = 0, off;
> +
> + rte_raw_cksum_mbuf(m, hdr->csum_start,
> + rte_pktmbuf_pkt_len(m) - hdr->csum_start,
> + &csum);
> + if (likely(csum != 0xffff))
> + csum = ~csum;
> + off = hdr->csum_offset + hdr->csum_start;
> + if (rte_pktmbuf_data_len(m) >= off + 1)
> + *rte_pktmbuf_mtod_offset(m, uint16_t *,
> + off) = csum;
> + }
> + } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
> + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
> + }
> +
> + return 0;
> +}
> +
> +static inline uint16_t
> +virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,
> + struct rte_mbuf **rx_pkts)
> +{
> + struct virtqueue *vq = rxvq->vq;
> + struct virtio_hw *hw = vq->hw;
> + uint16_t hdr_size = hw->vtnet_hdr_size;
> + uint64_t addrs[PACKED_BATCH_SIZE];
> + uint16_t id = vq->vq_used_cons_idx;
> + uint8_t desc_stats;
> + uint16_t i;
> + void *desc_addr;
> +
> + if (id & PACKED_BATCH_MASK)
> + return -1;
> +
> + if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries))
> + return -1;
> +
> + /* only care avail/used bits */
> + __m512i v_mask = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
> + desc_addr = &vq->vq_packed.ring.desc[id];
> +
> + __m512i v_desc = _mm512_loadu_si512(desc_addr);
> + __m512i v_flag = _mm512_and_epi64(v_desc, v_mask);
> +
> + __m512i v_used_flag = _mm512_setzero_si512();
> + if (vq->vq_packed.used_wrap_counter)
> + v_used_flag = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
> +
> + /* Check all descs are used */
> + desc_stats = _mm512_cmpneq_epu64_mask(v_flag, v_used_flag);
> + if (desc_stats)
> + return -1;
> +
> + virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> + rx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie;
> + rte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *));
> +
> + addrs[i] = (uint64_t)rx_pkts[i]->rx_descriptor_fields1;
> + }
> +
> + /*
> + * load len from desc, store into mbuf pkt_len and data_len
> + * len limiated by l6bit buf_len, pkt_len[16:31] can be ignored
> + */
> + const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
> + __m512i values = _mm512_maskz_shuffle_epi32(mask, v_desc, 0xAA);
> +
> + /* reduce hdr_len from pkt_len and data_len */
> + __m512i mbuf_len_offset = _mm512_maskz_set1_epi32(mask,
> + (uint32_t)-hdr_size);
> +
> + __m512i v_value = _mm512_add_epi32(values, mbuf_len_offset);
> +
> + /* assert offset of data_len */
> + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
> + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
> +
> + __m512i v_index = _mm512_set_epi64(addrs[3] + 8, addrs[3],
> + addrs[2] + 8, addrs[2],
> + addrs[1] + 8, addrs[1],
> + addrs[0] + 8, addrs[0]);
> + /* batch store into mbufs */
> + _mm512_i64scatter_epi64(0, v_index, v_value, 1);
> +
> + if (hw->has_rx_offload) {
> + virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> + char *addr = (char *)rx_pkts[i]->buf_addr +
> + RTE_PKTMBUF_HEADROOM - hdr_size;
> + virtio_vec_rx_offload(rx_pkts[i],
> + (struct virtio_net_hdr *)addr);
> + }
> + }
> +
> + virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len,
> + rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len,
> + rx_pkts[3]->pkt_len);
> +
> + vq->vq_free_cnt += PACKED_BATCH_SIZE;
> +
> + vq->vq_used_cons_idx += PACKED_BATCH_SIZE;
> + if (vq->vq_used_cons_idx >= vq->vq_nentries) {
> + vq->vq_used_cons_idx -= vq->vq_nentries;
> + vq->vq_packed.used_wrap_counter ^= 1;
> + }
> +
> + return 0;
> +}
> +
> +static uint16_t
> +virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,
> + struct rte_mbuf **rx_pkts)
> +{
> + uint16_t used_idx, id;
> + uint32_t len;
> + struct virtqueue *vq = rxvq->vq;
> + struct virtio_hw *hw = vq->hw;
> + uint32_t hdr_size = hw->vtnet_hdr_size;
> + struct virtio_net_hdr *hdr;
> + struct vring_packed_desc *desc;
> + struct rte_mbuf *cookie;
> +
> + desc = vq->vq_packed.ring.desc;
> + used_idx = vq->vq_used_cons_idx;
> + if (!desc_is_used(&desc[used_idx], vq))
> + return -1;
> +
> + len = desc[used_idx].len;
> + id = desc[used_idx].id;
> + cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
> + if (unlikely(cookie == NULL)) {
> + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
> + vq->vq_used_cons_idx);
> + return -1;
> + }
> + rte_prefetch0(cookie);
> + rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
> +
> + cookie->data_off = RTE_PKTMBUF_HEADROOM;
> + cookie->ol_flags = 0;
> + cookie->pkt_len = (uint32_t)(len - hdr_size);
> + cookie->data_len = (uint32_t)(len - hdr_size);
> +
> + hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +
> + RTE_PKTMBUF_HEADROOM - hdr_size);
> + if (hw->has_rx_offload)
> + virtio_vec_rx_offload(cookie, hdr);
> +
> + *rx_pkts = cookie;
> +
> + rxvq->stats.bytes += cookie->pkt_len;
> +
> + vq->vq_free_cnt++;
> + vq->vq_used_cons_idx++;
> + if (vq->vq_used_cons_idx >= vq->vq_nentries) {
> + vq->vq_used_cons_idx -= vq->vq_nentries;
> + vq->vq_packed.used_wrap_counter ^= 1;
> + }
> +
> + return 0;
> +}
> +
> +static inline void
> +virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,
> + struct rte_mbuf **cookie,
> + uint16_t num)
> +{
> + struct virtqueue *vq = rxvq->vq;
> + struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
> + uint16_t flags = vq->vq_packed.cached_flags;
> + struct virtio_hw *hw = vq->hw;
> + struct vq_desc_extra *dxp;
> + uint16_t idx, i;
> + uint16_t batch_num, total_num = 0;
> + uint16_t head_idx = vq->vq_avail_idx;
> + uint16_t head_flag = vq->vq_packed.cached_flags;
> + uint64_t addr;
> +
> + do {
> + idx = vq->vq_avail_idx;
> +
> + batch_num = PACKED_BATCH_SIZE;
> + if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
> + batch_num = vq->vq_nentries - idx;
> + if (unlikely((total_num + batch_num) > num))
> + batch_num = num - total_num;
> +
> + virtio_for_each_try_unroll(i, 0, batch_num) {
> + dxp = &vq->vq_descx[idx + i];
> + dxp->cookie = (void *)cookie[total_num + i];
> +
> + addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +
> + RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
> + start_dp[idx + i].addr = addr;
> + start_dp[idx + i].len = cookie[total_num + i]->buf_len
> + - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
> + if (total_num || i) {
> + virtqueue_store_flags_packed(&start_dp[idx + i],
> + flags, hw->weak_barriers);
> + }
> + }
> +
> + vq->vq_avail_idx += batch_num;
> + if (vq->vq_avail_idx >= vq->vq_nentries) {
> + vq->vq_avail_idx -= vq->vq_nentries;
> + vq->vq_packed.cached_flags ^=
> + VRING_PACKED_DESC_F_AVAIL_USED;
> + flags = vq->vq_packed.cached_flags;
> + }
> + total_num += batch_num;
> + } while (total_num < num);
> +
> + virtqueue_store_flags_packed(&start_dp[head_idx], head_flag,
> + hw->weak_barriers);
> + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
> +}
> +
> +uint16_t
> +virtio_recv_pkts_packed_vec(void *rx_queue,
> + struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct virtnet_rx *rxvq = rx_queue;
> + struct virtqueue *vq = rxvq->vq;
> + struct virtio_hw *hw = vq->hw;
> + uint16_t num, nb_rx = 0;
> + uint32_t nb_enqueued = 0;
> + uint16_t free_cnt = vq->vq_free_thresh;
> +
> + if (unlikely(hw->started == 0))
> + return nb_rx;
> +
> + num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
> + if (likely(num > PACKED_BATCH_SIZE))
> + num = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE);
> +
> + while (num) {
> + if (!virtqueue_dequeue_batch_packed_vec(rxvq,
> + &rx_pkts[nb_rx])) {
> + nb_rx += PACKED_BATCH_SIZE;
> + num -= PACKED_BATCH_SIZE;
> + continue;
> + }
> + if (!virtqueue_dequeue_single_packed_vec(rxvq,
> + &rx_pkts[nb_rx])) {
> + nb_rx++;
> + num--;
> + continue;
> + }
> + break;
> + };
> +
> + PMD_RX_LOG(DEBUG, "dequeue:%d", num);
> +
> + rxvq->stats.packets += nb_rx;
> +
> + if (likely(vq->vq_free_cnt >= free_cnt)) {
> + struct rte_mbuf *new_pkts[free_cnt];
> + if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
> + free_cnt) == 0)) {
> + virtio_recv_refill_packed_vec(rxvq, new_pkts,
> + free_cnt);
> + nb_enqueued += free_cnt;
> + } else {
> + struct rte_eth_dev *dev =
> + &rte_eth_devices[rxvq->port_id];
> + dev->data->rx_mbuf_alloc_failed += free_cnt;
> + }
> + }
> +
> + if (likely(nb_enqueued)) {
> + if (unlikely(virtqueue_kick_prepare_packed(vq))) {
> + virtqueue_notify(vq);
> + PMD_RX_LOG(DEBUG, "Notified");
> + }
> + }
> +
> + return nb_rx;
> +}
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> index 40ad786cc..c54698ad1 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -528,6 +528,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
> hw->use_msix = 1;
> hw->modern = 0;
> hw->use_vec_rx = 0;
> + hw->use_vec_tx = 0;
> hw->use_inorder_rx = 0;
> hw->use_inorder_tx = 0;
> hw->virtio_user_dev = dev;
> @@ -739,8 +740,19 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
> goto end;
> }
>
> - if (vectorized)
> - hw->use_vec_rx = 1;
> + if (vectorized) {
> + if (packed_vq) {
> +#if defined(CC_AVX512_SUPPORT)
> + hw->use_vec_rx = 1;
> + hw->use_vec_tx = 1;
> +#else
> + PMD_INIT_LOG(INFO,
> + "building environment do not support packed ring vectorized");
> +#endif
> + } else {
> + hw->use_vec_rx = 1;
> + }
> + }
>
> rte_eth_dev_probing_finish(eth_dev);
> ret = 0;
> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> index ca1c10499..ce0340743 100644
> --- a/drivers/net/virtio/virtqueue.h
> +++ b/drivers/net/virtio/virtqueue.h
> @@ -239,7 +239,8 @@ struct vq_desc_extra {
> void *cookie;
> uint16_t ndescs;
> uint16_t next;
> -};
> + uint8_t padding[4];
> +} __rte_packed __rte_aligned(16);
Can't this introduce a performance impact for the non-vectorized
case? I think of worse cache liens utilization.
For example with a burst of 32 descriptors with 32B cachelines, before
it would take 14 cachelines, after 16. So for each burst, one could face
2 extra cache misses.
If you could run non-vectorized benchamrks with and without that patch,
I would be grateful.
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
next prev parent reply other threads:[~2020-04-27 11:21 UTC|newest]
Thread overview: 162+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-13 17:42 [dpdk-dev] [PATCH v1 0/7] vectorize virtio packed ring datapath Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 1/7] net/virtio: add Rx free threshold setting Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 2/7] net/virtio-user: add LRO parameter Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 3/7] net/virtio: add vectorized packed ring Rx function Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 4/7] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 5/7] net/virtio: add vectorized packed ring Tx function Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 6/7] net/virtio: add election for vectorized datapath Marvin Liu
2020-03-13 17:42 ` [dpdk-dev] [PATCH v1 7/7] net/virtio: support meson build Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 0/7] add packed ring vectorized datapath Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 1/7] net/virtio: add Rx free threshold setting Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 2/7] net/virtio-user: add vectorized packed ring parameter Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 3/7] net/virtio: add vectorized packed ring Rx function Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 4/7] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 5/7] net/virtio: add vectorized packed ring Tx datapath Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 6/7] net/virtio: add election for vectorized datapath Marvin Liu
2020-03-27 16:54 ` [dpdk-dev] [PATCH v2 7/7] doc: add packed " Marvin Liu
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 0/7] add packed ring " Marvin Liu
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 1/7] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-08 6:08 ` Ye Xiaolong
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 2/7] net/virtio-user: add vectorized packed ring parameter Marvin Liu
2020-04-08 6:22 ` Ye Xiaolong
2020-04-08 7:31 ` Liu, Yong
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 3/7] net/virtio: add vectorized packed ring Rx function Marvin Liu
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 4/7] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 5/7] net/virtio: add vectorized packed ring Tx datapath Marvin Liu
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 6/7] net/virtio: add election for vectorized datapath Marvin Liu
2020-04-08 8:53 ` [dpdk-dev] [PATCH v3 7/7] doc: add packed " Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 0/8] add packed ring " Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 1/8] net/virtio: enable " Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 2/8] net/virtio-user: add vectorized datapath parameter Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 3/8] net/virtio: add vectorized packed ring Rx function Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 4/8] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 5/8] net/virtio: add vectorized packed ring Tx datapath Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 6/8] eal/x86: identify AVX512 extensions flag Marvin Liu
2020-04-15 13:31 ` David Marchand
2020-04-15 14:57 ` Liu, Yong
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 7/8] net/virtio: add election for vectorized datapath Marvin Liu
2020-04-15 16:47 ` [dpdk-dev] [PATCH v4 8/8] doc: add packed " Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 0/9] add packed ring vectorized path Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 2/9] net/virtio: enable vectorized path Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 3/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 4/9] net/virtio-user: add vectorized path parameter Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 5/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 6/9] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-16 15:31 ` [dpdk-dev] [PATCH v5 9/9] doc: add packed " Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 0/9] add packed ring " Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 2/9] net/virtio: enable vectorized path Marvin Liu
2020-04-20 14:08 ` Maxime Coquelin
2020-04-21 6:43 ` Liu, Yong
2020-04-22 8:07 ` Liu, Yong
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 3/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 4/9] net/virtio-user: add vectorized path parameter Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 5/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 6/9] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-16 22:24 ` [dpdk-dev] [PATCH v6 9/9] doc: add packed " Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 0/9] add packed ring " Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 2/9] net/virtio: enable vectorized path Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 3/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 4/9] net/virtio-user: add vectorized path parameter Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 5/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 6/9] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-22 6:16 ` [dpdk-dev] [PATCH v7 9/9] doc: add packed " Marvin Liu
2020-04-23 12:30 ` [dpdk-dev] [PATCH v8 0/9] add packed ring " Marvin Liu
2020-04-23 12:30 ` [dpdk-dev] [PATCH v8 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-23 8:09 ` Maxime Coquelin
2020-04-23 12:30 ` [dpdk-dev] [PATCH v8 2/9] net/virtio: enable vectorized path Marvin Liu
2020-04-23 8:33 ` Maxime Coquelin
2020-04-23 8:46 ` Liu, Yong
2020-04-23 8:49 ` Maxime Coquelin
2020-04-23 9:59 ` Liu, Yong
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 3/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-23 8:46 ` Maxime Coquelin
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 4/9] net/virtio-user: add vectorized path parameter Marvin Liu
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 5/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 6/9] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-23 12:31 ` [dpdk-dev] [PATCH v8 9/9] doc: add packed " Marvin Liu
2020-04-23 15:17 ` [dpdk-dev] [PATCH v8 0/9] add packed ring " Wang, Yinan
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 " Marvin Liu
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 2/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 3/9] net/virtio: add vectorized devarg Marvin Liu
2020-04-24 11:27 ` Maxime Coquelin
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 4/9] net/virtio-user: " Marvin Liu
2020-04-24 11:29 ` Maxime Coquelin
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 5/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-24 11:51 ` Maxime Coquelin
2020-04-24 13:12 ` Liu, Yong
2020-04-24 13:33 ` Maxime Coquelin
2020-04-24 13:40 ` Liu, Yong
2020-04-24 15:58 ` Liu, Yong
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 6/9] net/virtio: reuse packed ring xmit functions Marvin Liu
2020-04-24 12:01 ` Maxime Coquelin
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-24 12:29 ` Maxime Coquelin
2020-04-24 13:33 ` Liu, Yong
2020-04-24 13:35 ` Maxime Coquelin
2020-04-24 13:47 ` Liu, Yong
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-24 13:26 ` Maxime Coquelin
2020-04-24 9:24 ` [dpdk-dev] [PATCH v9 9/9] doc: add packed " Marvin Liu
2020-04-24 13:31 ` Maxime Coquelin
2020-04-26 2:19 ` [dpdk-dev] [PATCH v9 0/9] add packed ring " Marvin Liu
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 2/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 3/9] net/virtio: add vectorized devarg Marvin Liu
2020-04-27 11:12 ` Maxime Coquelin
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 4/9] net/virtio-user: " Marvin Liu
2020-04-27 11:07 ` Maxime Coquelin
2020-04-28 1:29 ` Liu, Yong
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 5/9] net/virtio: reuse packed ring functions Marvin Liu
2020-04-27 11:08 ` Maxime Coquelin
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 6/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-27 11:20 ` Maxime Coquelin [this message]
2020-04-28 1:14 ` Liu, Yong
2020-04-28 8:44 ` Maxime Coquelin
2020-04-28 13:01 ` Liu, Yong
2020-04-28 13:46 ` Maxime Coquelin
2020-04-28 14:43 ` Liu, Yong
2020-04-28 14:50 ` Maxime Coquelin
2020-04-28 15:35 ` Liu, Yong
2020-04-28 15:40 ` Maxime Coquelin
2020-04-28 15:55 ` Liu, Yong
2020-04-28 17:01 ` Liu, Yong
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-27 11:55 ` Maxime Coquelin
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-26 2:19 ` [dpdk-dev] [PATCH v10 9/9] doc: add packed " Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 0/9] add packed ring " Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 2/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 3/9] net/virtio: add vectorized devarg Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 4/9] net/virtio-user: " Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 5/9] net/virtio: reuse packed ring functions Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 6/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-30 9:48 ` Ferruh Yigit
2020-04-30 10:23 ` Bruce Richardson
2020-04-30 13:04 ` Ferruh Yigit
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-28 8:32 ` [dpdk-dev] [PATCH v11 9/9] doc: add packed " Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 0/9] add packed ring " Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 1/9] net/virtio: add Rx free threshold setting Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 2/9] net/virtio: inorder should depend on feature bit Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 3/9] net/virtio: add vectorized devarg Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 4/9] net/virtio-user: " Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 5/9] net/virtio: reuse packed ring functions Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 6/9] net/virtio: add vectorized packed ring Rx path Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 7/9] net/virtio: add vectorized packed ring Tx path Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 8/9] net/virtio: add election for vectorized path Marvin Liu
2020-04-29 7:28 ` [dpdk-dev] [PATCH v12 9/9] doc: add packed " Marvin Liu
2020-04-29 8:17 ` [dpdk-dev] [PATCH v12 0/9] add packed ring " Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=672a584a-46d1-c78b-7b21-9ed7bc060814@redhat.com \
--to=maxime.coquelin@redhat.com \
--cc=dev@dpdk.org \
--cc=xiaolong.ye@intel.com \
--cc=yong.liu@intel.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).