DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 0/2] vhost: add port mirroring function in the vhost lib
@ 2023-04-21  1:09 Cheng Jiang
  2023-04-21  1:09 ` [PATCH 1/2] vhost: add ingress API for port mirroring datapath Cheng Jiang
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Cheng Jiang @ 2023-04-21  1:09 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, xuan.ding, wenwux.ma, yuanx.wang, xingguang.he,
	Cheng Jiang

Similar to the port mirroring function on the switch or router, this
patch set implements such function on the Vhost lib. When
data is sent to a front-end, it will also send the data to its mirror
front-end. When data is received from a front-end, it will also send
the data to its mirror front-end.

Cheng Jiang (2):
  vhost: add ingress API for port mirroring datapath
  vhost: add egress API for port mirroring datapath

 lib/vhost/rte_vhost_async.h |   17 +
 lib/vhost/version.map       |    3 +
 lib/vhost/virtio_net.c      | 1266 +++++++++++++++++++++++++++++++++++
 3 files changed, 1286 insertions(+)

--
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] vhost: add ingress API for port mirroring datapath
  2023-04-21  1:09 [PATCH 0/2] vhost: add port mirroring function in the vhost lib Cheng Jiang
@ 2023-04-21  1:09 ` Cheng Jiang
  2023-04-21  1:09 ` [PATCH 2/2] vhost: add egress " Cheng Jiang
  2023-05-03  9:36 ` [PATCH 0/2] vhost: add port mirroring function in the vhost lib Maxime Coquelin
  2 siblings, 0 replies; 8+ messages in thread
From: Cheng Jiang @ 2023-04-21  1:09 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, xuan.ding, wenwux.ma, yuanx.wang, xingguang.he,
	Cheng Jiang

Similar to the port mirroring function on the switch or router, this
patch also implements an ingress function on the Vhost lib. When
data is sent to a front-end, it will also send the data to its mirror
front-end.

Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
---
 lib/vhost/rte_vhost_async.h |   6 +
 lib/vhost/version.map       |   1 +
 lib/vhost/virtio_net.c      | 688 ++++++++++++++++++++++++++++++++++++
 3 files changed, 695 insertions(+)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 8f190dd44b..30aaf66b60 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -286,6 +286,12 @@ __rte_experimental
 int
 rte_vhost_async_dma_unconfigure(int16_t dma_id, uint16_t vchan_id);
 
+__rte_experimental
+uint16_t rte_vhost_async_try_egress_burst(int vid, uint16_t queue_id,
+		int mr_vid, uint16_t mr_queue_id,
+		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+		int *nr_inflight, int16_t dma_id, uint16_t vchan_id);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index d322a4a888..95f75a6928 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -98,6 +98,7 @@ EXPERIMENTAL {
 	# added in 22.11
 	rte_vhost_async_dma_unconfigure;
 	rte_vhost_vring_call_nonblock;
+	rte_vhost_async_try_egress_burst;
 };
 
 INTERNAL {
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index be28ea5151..c7e99d403e 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -4262,3 +4262,691 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
 
 	return count;
 }
+
+
+static __rte_always_inline uint16_t
+async_poll_egress_completed_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
+		uint16_t vchan_id, bool legacy_ol_flags)
+{
+	uint16_t nr_cpl_pkts = 0;
+	uint16_t start_idx, from, i;
+	struct async_inflight_info *pkts_info = vq->async->pkts_info;
+	uint16_t n_descs = 0;
+
+	vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
+
+	start_idx = async_get_first_inflight_pkt_idx(vq);
+
+	from = start_idx;
+	while (vq->async->pkts_cmpl_flag[from] && count--) {
+		vq->async->pkts_cmpl_flag[from] = false;
+		from = (from + 1) % vq->size;
+		nr_cpl_pkts++;
+	}
+
+	if (nr_cpl_pkts == 0)
+		return 0;
+
+	for (i = 0; i < nr_cpl_pkts; i++) {
+		from = (start_idx + i) % vq->size;
+		n_descs += pkts_info[from].descs;
+		pkts[i] = pkts_info[from].mbuf;
+
+		if (virtio_net_with_host_offload(dev))
+			vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i],
+					legacy_ol_flags);
+	}
+
+	/* write back completed descs to used ring and update used idx */
+	write_back_completed_descs_split(vq, nr_cpl_pkts);
+	__atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE);
+	vhost_vring_call_split(dev, vq);
+
+	write_back_completed_descs_split(mr_vq, n_descs);
+	__atomic_add_fetch(&mr_vq->used->idx, n_descs, __ATOMIC_RELEASE);
+	vhost_vring_call_split(mr_dev, mr_vq);
+
+	vq->async->pkts_inflight_n -= nr_cpl_pkts;
+
+	return nr_cpl_pkts;
+}
+
+static __rte_always_inline int
+egress_async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t buf_iova,
+		struct virtio_net *mr_dev, uint64_t mr_buf_iova,
+		struct rte_mbuf *m, uint32_t mbuf_offset, uint32_t cpy_len)
+{
+	struct vhost_async *async = vq->async;
+	uint64_t mapped_len, mr_mapped_len;
+	uint32_t buf_offset = 0;
+	void *src, *dst, *mr_dst;
+	void *host_iova, *mr_host_iova;
+
+	while (cpy_len) {
+		host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
+					buf_iova + buf_offset, cpy_len,	&mapped_len);
+		if (unlikely(!host_iova)) {
+			VHOST_LOG_DATA(dev->ifname, ERR, "%s: failed to get host iova.\n", __func__);
+			return -1;
+		}
+
+		mr_host_iova = (void *)(uintptr_t)gpa_to_first_hpa(mr_dev,
+					mr_buf_iova + buf_offset, cpy_len, &mr_mapped_len);
+		if (unlikely(!mr_host_iova)) {
+			VHOST_LOG_DATA(mr_dev->ifname, ERR, "%s: failed to get mirror hpa.\n", __func__);
+			return -1;
+		}
+
+		if (unlikely(mr_mapped_len != mapped_len)) {
+			VHOST_LOG_DATA(dev->ifname, ERR, "original mapped len is not equal to mirror len\n");
+			return -1;
+		}
+
+		src = host_iova;
+		dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
+
+		mr_dst = mr_host_iova;
+
+		if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len)))
+			return -1;
+
+		if (unlikely(async_iter_add_iovec(dev, async, src, mr_dst, (size_t)mapped_len)))
+			return -1;
+
+		cpy_len -= (uint32_t)mapped_len;
+		mbuf_offset += (uint32_t)mapped_len;
+		buf_offset += (uint32_t)mapped_len;
+	}
+
+	return 0;
+}
+
+static __rte_always_inline int
+egress_async_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		  struct buf_vector *buf_vec, uint16_t nr_vec,
+		  struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+		  struct buf_vector *mr_buf_vec, uint16_t mr_nr_vec, uint16_t mr_num_buffers,
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
+		  uint16_t slot_idx)
+{
+	uint64_t buf_iova;
+	uint32_t buf_avail, buf_offset, buf_len;
+	uint32_t mbuf_avail, mbuf_offset;
+	uint32_t hdr_remain = dev->vhost_hlen;
+	uint32_t cpy_len;
+	uint16_t vec_idx, mr_vec_idx = 0;
+	struct rte_mbuf *cur = m;
+	struct rte_mbuf *prev = m;
+	struct virtio_net_hdr tmp_hdr;
+	struct virtio_net_hdr *hdr = NULL;
+
+	uint64_t mr_buf_addr, mr_buf_iova;
+	uint32_t mr_buf_avail, mr_buf_offset, mr_buf_len;
+	struct virtio_net_hdr_mrg_rxbuf mr_tmp_hdr;
+	struct virtio_net_hdr_mrg_rxbuf *mr_hdr = NULL;
+	uint64_t mr_hdr_addr;
+
+	struct vhost_async *async = vq->async;
+	struct async_inflight_info *pkts_info;
+
+	/* original desc to mbuf */
+
+	/*
+	 * The caller has checked the descriptors chain is larger than the
+	 * header size.
+	 */
+	if (virtio_net_with_host_offload(dev)) {
+		if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) {
+			/*
+			 * No luck, the virtio-net header doesn't fit
+			 * in a contiguous virtual area.
+			 */
+			copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
+			hdr = &tmp_hdr;
+		} else {
+			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr);
+		}
+	}
+
+	for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) {
+		if (buf_vec[vec_idx].buf_len > hdr_remain)
+			break;
+
+		hdr_remain -= buf_vec[vec_idx].buf_len;
+	}
+
+	buf_len = buf_vec[vec_idx].buf_len;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_offset = hdr_remain;
+	buf_avail = buf_vec[vec_idx].buf_len - hdr_remain;
+
+	/* original desc to mirror desc */
+
+	mr_buf_addr = mr_buf_vec[mr_vec_idx].buf_addr;
+	mr_buf_iova = mr_buf_vec[mr_vec_idx].buf_iova;
+	mr_buf_len = mr_buf_vec[mr_vec_idx].buf_len;
+
+	if (unlikely(mr_buf_len < mr_dev->vhost_hlen && mr_nr_vec <= 1))
+		return -1;
+
+	mr_hdr_addr = mr_buf_addr;
+
+	if (unlikely(mr_buf_len < mr_dev->vhost_hlen)) {
+		memset(&mr_tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+		mr_hdr = &mr_tmp_hdr;
+	} else
+		mr_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)mr_hdr_addr;
+
+	VHOST_LOG_DATA(mr_dev->ifname, DEBUG, "mirror port RX: num merge buffers %d\n", mr_num_buffers);
+
+	if (unlikely(mr_buf_len < mr_dev->vhost_hlen)) {
+		mr_buf_offset = mr_dev->vhost_hlen - mr_buf_len;
+		mr_vec_idx++;
+		mr_buf_addr = mr_buf_vec[mr_vec_idx].buf_addr;
+		mr_buf_iova = mr_buf_vec[mr_vec_idx].buf_iova;
+		mr_buf_len = mr_buf_vec[mr_vec_idx].buf_len;
+		mr_buf_avail = mr_buf_len - mr_buf_offset;
+	} else {
+		mr_buf_offset = mr_dev->vhost_hlen;
+		mr_buf_avail = mr_buf_len - mr_dev->vhost_hlen;
+	}
+
+	rte_memcpy((void *)&mr_hdr->hdr, (void *)hdr, sizeof(struct virtio_net_hdr));
+	if (rxvq_is_mergeable(mr_dev))
+		ASSIGN_UNLESS_EQUAL(mr_hdr->num_buffers, mr_num_buffers);
+	if (unlikely((uintptr_t)mr_hdr == (uintptr_t)&mr_tmp_hdr))
+		copy_vnet_hdr_to_desc(mr_dev, mr_vq, mr_buf_vec, mr_hdr);
+	else {
+		PRINT_PACKET(mr_dev, (uintptr_t)mr_buf_addr,
+				mr_dev->vhost_hlen, 0);
+		vhost_log_cache_write_iova(mr_dev, mr_vq,
+				mr_buf_vec[0].buf_iova,
+				mr_dev->vhost_hlen);
+	}
+
+	mbuf_offset = 0;
+	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+
+	pkts_info = async->pkts_info;
+	if (async_iter_initialize(dev, async))
+		return -1;
+
+	while (1) {
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
+		cpy_len = RTE_MIN(cpy_len, mr_buf_avail);
+
+		if (egress_async_fill_seg(dev, vq, buf_iova + buf_offset,
+				mr_dev, mr_buf_iova + mr_buf_offset,
+				m, mbuf_offset, cpy_len) < 0)
+			goto error;
+
+		mbuf_avail -= cpy_len;
+		mbuf_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
+		mr_buf_avail -= cpy_len;
+		mr_buf_offset += cpy_len;
+
+		/* This buf reaches to its end, get the next one */
+		if (buf_avail == 0) {
+			if (++vec_idx >= nr_vec)
+				break;
+
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
+			buf_offset = 0;
+			buf_avail = buf_len;
+		}
+
+		if (mr_buf_avail == 0) {
+			mr_vec_idx++;
+			if (unlikely(mr_vec_idx >= mr_nr_vec))
+				goto error;
+
+			mr_buf_addr = mr_buf_vec[mr_vec_idx].buf_addr;
+			mr_buf_iova = mr_buf_vec[mr_vec_idx].buf_iova;
+			mr_buf_len = mr_buf_vec[mr_vec_idx].buf_len;
+
+			mr_buf_offset = 0;
+			mr_buf_avail = mr_buf_len;
+		}
+
+		/*
+		 * This mbuf reaches to its end, get a new one
+		 * to hold more data.
+		 */
+		if (mbuf_avail == 0) {
+			cur = rte_pktmbuf_alloc(mbuf_pool);
+			if (unlikely(cur == NULL)) {
+				VHOST_LOG_DATA(dev->ifname, ERR, "Failed to allocate memory for mbuf.\n");
+				goto error;
+			}
+
+			prev->next = cur;
+			prev->data_len = mbuf_offset;
+			m->nb_segs += 1;
+			m->pkt_len += mbuf_offset;
+			prev = cur;
+
+			mbuf_offset = 0;
+			mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+		}
+	}
+
+	prev->data_len = mbuf_offset;
+	m->pkt_len += mbuf_offset;
+
+	async_iter_finalize(async);
+	if (hdr)
+		pkts_info[slot_idx].nethdr = *hdr;
+
+	return 0;
+
+error:
+	async_iter_cancel(async);
+
+	return -1;
+}
+
+static __rte_always_inline uint16_t
+virtio_mirror_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+		uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
+{
+	static bool allocerr_warned;
+	bool dropped = false;
+	uint16_t avail_entries;
+	uint16_t pkt_idx, slot_idx = 0;
+	uint16_t nr_done_pkts = 0;
+	uint16_t pkt_err = 0;
+	uint16_t n_xfer;
+	struct vhost_async *async = vq->async;
+	struct vhost_async *mr_async = mr_vq->async;
+	struct async_inflight_info *pkts_info = async->pkts_info;
+	struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST];
+	struct buf_vector mr_buf_vec[BUF_VECTOR_MAX];
+	uint16_t mr_num_buffers;
+	uint16_t mr_avail_head;
+	uint16_t pkts_size = count;
+
+	/**
+	 * The ordering between avail index and
+	 * desc reads needs to be enforced.
+	 */
+	avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - vq->last_avail_idx;
+	if (avail_entries == 0)
+		goto out;
+
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
+	mr_avail_head = __atomic_load_n(&mr_vq->avail->idx, __ATOMIC_ACQUIRE);
+	rte_prefetch0(&mr_vq->avail->ring[mr_vq->last_avail_idx & (mr_vq->size - 1)]);
+
+	async_iter_reset(async);
+
+	count = RTE_MIN(count, MAX_PKT_BURST);
+	count = RTE_MIN(count, avail_entries);
+	VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count);
+
+	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count))
+		goto out;
+
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint16_t head_idx = 0;
+		uint16_t nr_vec = 0;
+		uint16_t to;
+		uint32_t buf_len;
+		int err;
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
+		uint16_t mr_nr_vec = 0;
+
+		if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx,
+						&nr_vec, buf_vec,
+						&head_idx, &buf_len,
+						VHOST_ACCESS_RO) < 0)) {
+			dropped = true;
+			break;
+		}
+
+		if (unlikely(buf_len <= dev->vhost_hlen)) {
+			dropped = true;
+			break;
+		}
+
+		buf_len -= dev->vhost_hlen;
+
+		err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len);
+		if (unlikely(err)) {
+			/**
+			 * mbuf allocation fails for jumbo packets when external
+			 * buffer allocation is not allowed and linear buffer
+			 * is required. Drop this packet.
+			 */
+			if (!allocerr_warned) {
+				VHOST_LOG_DATA(dev->ifname, ERR,
+					"Failed mbuf alloc of size %d from %s.\n",
+					buf_len, mbuf_pool->name);
+				allocerr_warned = true;
+			}
+			dropped = true;
+			break;
+		}
+
+		if (unlikely(reserve_avail_buf_split(mr_dev, mr_vq,
+						buf_len, mr_buf_vec, &mr_num_buffers,
+						mr_avail_head, &mr_nr_vec) < 0)) {
+			VHOST_LOG_DATA(mr_dev->ifname, DEBUG, "(%d) failed to get enough desc from mirror vring\n", mr_dev->vid);
+			mr_vq->shadow_used_idx -= mr_num_buffers;
+			dropped = true;
+			break;
+		}
+
+		VHOST_LOG_DATA(mr_dev->ifname, DEBUG,
+			"mirror port current index %d | end index %d\n",
+			mr_vq->last_avail_idx, mr_vq->last_avail_idx + mr_num_buffers);
+
+		slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
+		err = egress_async_desc_to_mbuf(dev, vq, buf_vec, nr_vec,
+				mr_dev, mr_vq, mr_buf_vec, mr_nr_vec, mr_num_buffers,
+				pkt, mbuf_pool, slot_idx);
+		if (unlikely(err)) {
+			if (!allocerr_warned) {
+				VHOST_LOG_DATA(dev->ifname, ERR, "Failed to offload copies to async channel.\n");
+				allocerr_warned = true;
+			}
+			vq->shadow_used_idx -= mr_num_buffers;
+			dropped = true;
+			break;
+		}
+
+		pkts_info[slot_idx].mbuf = pkt;
+		pkts_info[slot_idx].descs = mr_num_buffers;
+
+		/* store used descs */
+		to = async->desc_idx_split & (vq->size - 1);
+		async->descs_split[to].id = head_idx;
+		async->descs_split[to].len = 0;
+		async->desc_idx_split++;
+
+		vq->last_avail_idx++;
+		mr_vq->last_avail_idx += mr_num_buffers;
+	}
+
+	if (unlikely(dropped))
+		rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx);
+
+	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
+					async->iov_iter, pkt_idx);
+	async->pkts_inflight_n += n_xfer;
+
+	pkt_err = pkt_idx - n_xfer;
+	if (unlikely(pkt_err)) {
+		uint16_t num_descs = 0;
+
+		VHOST_LOG_DATA(dev->ifname, DEBUG,
+			"%s: failed to transfer %u packets for queue %u.\n",
+			__func__, pkt_err, vq->index);
+
+		/* update number of completed packets */
+		pkt_idx = n_xfer;
+
+		/* recover original vq available ring */
+		vq->last_avail_idx -= pkt_err;
+
+		/**
+		 * recover async channel copy related structures.
+		 */
+		async->desc_idx_split -= pkt_err;
+
+		/**
+		 * calculate the sum of descriptors to revert and free pktmbufs
+		 * for error pkts.
+		 */
+		while (pkt_err-- > 0) {
+			rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf);
+			num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
+			slot_idx--;
+		}
+
+		/* recover mirror vq shadow used ring and available ring */
+		mr_vq->shadow_used_idx -= num_descs;
+		mr_vq->last_avail_idx -= num_descs;
+	}
+
+	async->pkts_idx += pkt_idx;
+	if (async->pkts_idx >= vq->size)
+		async->pkts_idx -= vq->size;
+
+	/* keep used descriptors */
+	if (likely(mr_vq->shadow_used_idx)) {
+		uint16_t to = mr_async->desc_idx_split & (mr_vq->size - 1);
+
+		store_dma_desc_info_split(mr_vq->shadow_used_split,
+				mr_async->descs_split, mr_vq->size, 0, to,
+				mr_vq->shadow_used_idx);
+
+		mr_async->desc_idx_split += mr_vq->shadow_used_idx;
+
+		mr_async->pkts_idx += pkt_idx;
+		if (mr_async->pkts_idx >= mr_vq->size)
+			mr_async->pkts_idx -= pkt_idx;
+
+		mr_async->pkts_inflight_n += pkt_idx;
+		mr_vq->shadow_used_idx = 0;
+	}
+
+out:
+	/* DMA device may serve other queues, unconditionally check completed. */
+	nr_done_pkts = async_poll_egress_completed_split(dev, vq, mr_dev, mr_vq,
+							pkts, pkts_size,
+							dma_id, vchan_id, legacy_ol_flags);
+
+	return nr_done_pkts;
+}
+
+__rte_noinline
+static uint16_t
+virtio_mirror_dev_tx_async_split_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+		int16_t dma_id, uint16_t vchan_id)
+{
+	return virtio_mirror_dev_tx_async_split(dev, vq, mr_dev, mr_vq,
+				mbuf_pool, pkts, count, dma_id, vchan_id, true);
+}
+
+__rte_noinline
+static uint16_t
+virtio_mirror_dev_tx_async_split_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+		int16_t dma_id, uint16_t vchan_id)
+{
+	return virtio_mirror_dev_tx_async_split(dev, vq, mr_dev, mr_vq,
+				mbuf_pool, pkts, count, dma_id, vchan_id, false);
+}
+
+uint16_t
+rte_vhost_async_try_egress_burst(int vid, uint16_t queue_id,
+		int mr_vid, uint16_t mr_queue_id,
+		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+		int *nr_inflight, int16_t dma_id, uint16_t vchan_id)
+{
+	struct virtio_net *dev, *mr_dev;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct vhost_virtqueue *vq, *mr_vq;
+	int16_t success = 1;
+
+	dev = get_device(vid);
+	mr_dev = get_device(mr_vid);
+	if (!dev || !mr_dev || !nr_inflight)
+		return 0;
+
+	*nr_inflight = -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return 0;
+	}
+
+	if (unlikely(!(mr_dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			mr_dev->vid, __func__);
+		return 0;
+	}
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	if (unlikely(!is_valid_virt_queue_idx(mr_queue_id, 0, mr_dev->nr_vring))) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+			"(%d) %s: invalid virtqueue idx %d.\n",
+			mr_dev->vid, __func__, mr_queue_id);
+		return 0;
+	}
+
+	if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
+		VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n",
+			__func__, dma_id);
+		return 0;
+	}
+
+	if (unlikely(!dma_copy_track[dma_id].vchans ||
+				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
+		VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n",
+			__func__, dma_id, vchan_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	mr_vq = mr_dev->virtqueue[mr_queue_id];
+
+	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0) ||
+		unlikely(rte_spinlock_trylock(&mr_vq->access_lock) == 0))
+		return 0;
+
+	if (unlikely(vq->enabled == 0) || unlikely(mr_vq->enabled == 0)) {
+		count = 0;
+		goto out_access_unlock;
+	}
+
+	if (unlikely(vq->async == NULL)) {
+		VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n",
+			__func__, queue_id);
+		count = 0;
+		goto out_access_unlock;
+	}
+
+	if (unlikely(mr_vq->async == NULL)) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR, "%s: async not registered for queue id %d.\n",
+			__func__, mr_queue_id);
+		count = 0;
+		goto out_access_unlock;
+	}
+
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+	if (mr_dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(mr_vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+
+	if (unlikely(mr_vq->access_ok == 0))
+		if (unlikely(vring_translate(mr_dev, mr_vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+
+	/*
+	 * Construct a RARP broadcast packet, and inject it to the "pkts"
+	 * array, to looks like that guest actually send such packet.
+	 *
+	 * Check user_send_rarp() for more information.
+	 *
+	 * broadcast_rarp shares a cacheline in the virtio_net structure
+	 * with some fields that are accessed during enqueue and
+	 * __atomic_compare_exchange_n causes a write if performed compare
+	 * and exchange. This could result in false sharing between enqueue
+	 * and dequeue.
+	 *
+	 * Prevent unnecessary false sharing by reading broadcast_rarp first
+	 * and only performing compare and exchange if the read indicates it
+	 * is likely to be set.
+	 */
+	if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
+			__atomic_compare_exchange_n(&dev->broadcast_rarp,
+			&success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
+
+		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
+		if (rarp_mbuf == NULL) {
+			VHOST_LOG_DATA(dev->ifname, ERR, "Failed to make RARP packet.\n");
+			count = 0;
+			goto out;
+		}
+		/*
+		 * Inject it to the head of "pkts" array, so that switch's mac
+		 * learning table will get updated first.
+		 */
+		pkts[0] = rarp_mbuf;
+		vhost_queue_stats_update(dev, vq, pkts, 1);
+		pkts++;
+		count -= 1;
+	}
+
+	if (unlikely(vq_is_packed(dev))) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"(%d) %s: port does not support packed ring.\n",
+			dev->vid, __func__);
+		return 0;
+	}
+
+	if (unlikely(vq_is_packed(mr_dev))) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+			"(%d) %s: mirror port does not support packed ring.\n",
+			mr_dev->vid, __func__);
+		return 0;
+	}
+
+	if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
+		count = virtio_mirror_dev_tx_async_split_legacy(dev, vq, mr_dev, mr_vq,
+				mbuf_pool, pkts, count, dma_id, vchan_id);
+	else
+		count = virtio_mirror_dev_tx_async_split_compliant(dev, vq, mr_dev, mr_vq,
+				mbuf_pool, pkts, count, dma_id, vchan_id);
+
+	*nr_inflight = vq->async->pkts_inflight_n;
+	vhost_queue_stats_update(dev, vq, pkts, count);
+	vhost_queue_stats_update(mr_dev, mr_vq, pkts, count);
+
+out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+	if (mr_dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(mr_vq);
+
+out_access_unlock:
+	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&mr_vq->access_lock);
+
+	if (unlikely(rarp_mbuf != NULL))
+		count += 1;
+
+	return count;
+}
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] vhost: add egress API for port mirroring datapath
  2023-04-21  1:09 [PATCH 0/2] vhost: add port mirroring function in the vhost lib Cheng Jiang
  2023-04-21  1:09 ` [PATCH 1/2] vhost: add ingress API for port mirroring datapath Cheng Jiang
@ 2023-04-21  1:09 ` Cheng Jiang
  2023-05-03  9:36 ` [PATCH 0/2] vhost: add port mirroring function in the vhost lib Maxime Coquelin
  2 siblings, 0 replies; 8+ messages in thread
From: Cheng Jiang @ 2023-04-21  1:09 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, xuan.ding, wenwux.ma, yuanx.wang, xingguang.he,
	Cheng Jiang

This patch implements egress function on the Vhost lib. When packets are
received from a front-end, it will also send the packets to its mirror
front-end.

Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
---
 lib/vhost/rte_vhost_async.h |  11 +
 lib/vhost/version.map       |   2 +
 lib/vhost/virtio_net.c      | 682 +++++++++++++++++++++++++++++++++---
 3 files changed, 643 insertions(+), 52 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 30aaf66b60..4df473f1ec 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -286,6 +286,17 @@ __rte_experimental
 int
 rte_vhost_async_dma_unconfigure(int16_t dma_id, uint16_t vchan_id);
 
+__rte_experimental
+uint16_t rte_vhost_submit_ingress_mirroring_burst(int vid, uint16_t queue_id,
+		int mirror_vid, uint16_t mirror_queue_id,
+		struct rte_mbuf **pkts, uint16_t count,
+		int16_t dma_id, uint16_t vchan_id);
+
+__rte_experimental
+uint16_t rte_vhost_poll_ingress_completed(int vid, uint16_t queue_id, int mr_vid,
+		uint16_t mr_queue_id, struct rte_mbuf **pkts, uint16_t count,
+		int16_t dma_id, uint16_t vchan_id);
+
 __rte_experimental
 uint16_t rte_vhost_async_try_egress_burst(int vid, uint16_t queue_id,
 		int mr_vid, uint16_t mr_queue_id,
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 95f75a6928..347ea6ac9c 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -98,6 +98,8 @@ EXPERIMENTAL {
 	# added in 22.11
 	rte_vhost_async_dma_unconfigure;
 	rte_vhost_vring_call_nonblock;
+	rte_vhost_submit_ingress_mirroring_burst;
+	rte_vhost_poll_ingress_completed;
 	rte_vhost_async_try_egress_burst;
 };
 
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index c7e99d403e..f4c96c3216 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -4263,6 +4263,634 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
 	return count;
 }
 
+static __rte_always_inline int
+async_mirror_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t buf_iova,
+		struct virtio_net *mr_dev, uint64_t mr_buf_iova,
+		struct rte_mbuf *m, uint32_t mbuf_offset, uint32_t cpy_len, bool is_ingress)
+{
+	struct vhost_async *async = vq->async;
+	uint64_t mapped_len, mr_mapped_len;
+	uint32_t buf_offset = 0;
+	void *src, *dst, *mr_dst;
+	void *host_iova, *mr_host_iova;
+
+	while (cpy_len) {
+		host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
+				buf_iova + buf_offset, cpy_len, &mapped_len);
+		if (unlikely(!host_iova)) {
+			VHOST_LOG_DATA(dev->ifname, ERR, "%s: failed to get host iova.\n", __func__);
+			return -1;
+		}
+		mr_host_iova = (void *)(uintptr_t)gpa_to_first_hpa(mr_dev,
+				mr_buf_iova + buf_offset, cpy_len, &mr_mapped_len);
+		if (unlikely(!mr_host_iova)) {
+			VHOST_LOG_DATA(mr_dev->ifname, ERR, "%s: failed to get mirror host iova.\n", __func__);
+			return -1;
+		}
+
+		if (unlikely(mr_mapped_len != mapped_len)) {
+			VHOST_LOG_DATA(dev->ifname, ERR, "original mapped len is not equal to mirror len\n");
+			return -1;
+		}
+
+		if (is_ingress) {
+			src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
+			dst = host_iova;
+			mr_dst = mr_host_iova;
+		} else {
+			src = host_iova;
+			dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
+			mr_dst = mr_host_iova;
+		}
+
+		if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len)))
+			return -1;
+		if (unlikely(async_iter_add_iovec(mr_dev, async, src, mr_dst, (size_t)mapped_len)))
+			return -1;
+
+		cpy_len -= (uint32_t)mapped_len;
+		mbuf_offset += (uint32_t)mapped_len;
+		buf_offset += (uint32_t)mapped_len;
+	}
+
+	return 0;
+}
+
+static __rte_always_inline uint16_t
+vhost_poll_ingress_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+			struct rte_mbuf **pkts, uint16_t count,	int16_t dma_id, uint16_t vchan_id)
+{
+	struct vhost_async *async = vq->async;
+	struct vhost_async *mr_async = mr_vq->async;
+	struct async_inflight_info *pkts_info = async->pkts_info;
+
+	uint16_t nr_cpl_pkts = 0, n_descs = 0;
+	uint16_t mr_n_descs = 0;
+	uint16_t start_idx, from, i;
+
+	/* Check completed copies for the given DMA vChannel */
+	vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
+
+	start_idx = async_get_first_inflight_pkt_idx(vq);
+	/*
+	 * Calculate the number of copy completed packets.
+	 * Note that there may be completed packets even if
+	 * no copies are reported done by the given DMA vChannel,
+	 * as it's possible that a virtqueue uses multiple DMA
+	 * vChannels.
+	 */
+	from = start_idx;
+	while (vq->async->pkts_cmpl_flag[from] && count--) {
+		vq->async->pkts_cmpl_flag[from] = false;
+		from++;
+		if (from >= vq->size)
+			from -= vq->size;
+		nr_cpl_pkts++;
+	}
+
+	if (nr_cpl_pkts == 0)
+		return 0;
+
+	for (i = 0; i < nr_cpl_pkts; i++) {
+		from = (start_idx + i) % vq->size;
+		/* Only used with split ring */
+		n_descs += pkts_info[from].descs;
+		mr_n_descs += pkts_info[from].nr_buffers;
+		pkts[i] = pkts_info[from].mbuf;
+	}
+
+	async->pkts_inflight_n -= nr_cpl_pkts;
+
+	if (likely(vq->enabled && vq->access_ok)) {
+		write_back_completed_descs_split(vq, n_descs);
+		__atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE);
+		vhost_vring_call_split(dev, vq);
+	} else {
+		async->last_desc_idx_split += n_descs;
+	}
+
+	if (likely(mr_vq->enabled && mr_vq->access_ok)) {
+		write_back_completed_descs_split(mr_vq, mr_n_descs);
+		__atomic_add_fetch(&mr_vq->used->idx, mr_n_descs, __ATOMIC_RELEASE);
+		vhost_vring_call_split(mr_dev, mr_vq);
+	} else {
+		mr_async->last_desc_idx_split += mr_n_descs;
+	}
+
+	return nr_cpl_pkts;
+}
+
+uint16_t
+rte_vhost_poll_ingress_completed(int vid, uint16_t queue_id, int mr_vid, uint16_t mr_queue_id,
+			struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id)
+{
+	struct virtio_net *dev, *mr_dev;
+	struct vhost_virtqueue *vq, *mr_vq;
+	uint16_t n_pkts_cpl = 0;
+
+	dev = get_device(vid);
+	if (unlikely(!dev))
+		return 0;
+
+	mr_dev = get_device(mr_vid);
+	if (unlikely(!mr_dev))
+		return 0;
+
+	VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"%s: invalid virtqueue idx %d.\n",
+			__func__, queue_id);
+		return 0;
+	}
+	if (unlikely(!is_valid_virt_queue_idx(mr_queue_id, 0, mr_dev->nr_vring))) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+			"%s: invalid virtqueue idx %d.\n",
+			__func__, mr_queue_id);
+		return 0;
+	}
+
+	if (unlikely(!dma_copy_track[dma_id].vchans ||
+			!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+				"%s: invalid channel %d:%u.\n",
+				__func__, dma_id, vchan_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	mr_vq = mr_dev->virtqueue[mr_queue_id];
+
+	if (!rte_spinlock_trylock(&vq->access_lock)) {
+		VHOST_LOG_DATA(dev->ifname, DEBUG,
+				"%s: virtqueue %u is busy.\n",
+				__func__, queue_id);
+		return 0;
+	}
+
+	if (!rte_spinlock_trylock(&mr_vq->access_lock)) {
+		VHOST_LOG_DATA(mr_dev->ifname, DEBUG,
+				"%s: mirror virtqueue %u is busy.\n",
+				__func__, queue_id);
+		goto out_org_access_unlock;
+	}
+
+	if (unlikely(!vq->async)) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+				"%s: async not registered for virtqueue %d.\n",
+				__func__, queue_id);
+		goto out;
+	}
+
+	if (unlikely(!mr_vq->async)) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+				"%s: async not registered for mirror virtqueue %d.\n",
+				__func__, queue_id);
+		goto out;
+	}
+
+	n_pkts_cpl = vhost_poll_ingress_completed(dev, vq, mr_dev, mr_vq,
+					pkts, count, dma_id, vchan_id);
+
+	vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
+	vhost_queue_stats_update(mr_dev, mr_vq, pkts, n_pkts_cpl);
+	vq->stats.inflight_completed += n_pkts_cpl;
+
+out:
+	rte_spinlock_unlock(&mr_vq->access_lock);
+out_org_access_unlock:
+	rte_spinlock_unlock(&vq->access_lock);
+
+	return n_pkts_cpl;
+}
+
+static __rte_always_inline int
+ingress_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+		struct buf_vector *buf_vec, uint16_t nr_vec, uint16_t num_buffers,
+		struct buf_vector *mr_buf_vec, uint16_t mr_nr_vec, uint16_t mr_num_buffers,
+		struct rte_mbuf *m)
+{
+	uint32_t vec_idx = 0;
+	uint32_t mr_vec_idx = 0;
+	uint32_t mbuf_offset, mbuf_avail;
+	uint32_t buf_offset, buf_avail, mr_buf_offset, mr_buf_avail;
+	uint64_t buf_addr, mr_buf_addr, buf_iova, mr_buf_iova;
+	uint32_t cpy_len, buf_len, mr_buf_len;
+	uint64_t hdr_addr, mr_hdr_addr;
+	struct rte_mbuf *hdr_mbuf;
+	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
+	struct virtio_net_hdr_mrg_rxbuf mr_tmp_hdr, *mr_hdr = NULL;
+	struct vhost_async *async = vq->async;
+
+	if (unlikely(m == NULL))
+		return -1;
+
+	/* handle original port header */
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
+
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))
+		return -1;
+
+	hdr_mbuf = m;
+	hdr_addr = buf_addr;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+		hdr = &tmp_hdr;
+	} else {
+		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
+	}
+
+	VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers);
+
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail = buf_len - buf_offset;
+	} else {
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_len - dev->vhost_hlen;
+	}
+
+	/* handle mirror port header */
+	mr_buf_addr = mr_buf_vec[mr_vec_idx].buf_addr;
+	mr_buf_iova = mr_buf_vec[mr_vec_idx].buf_iova;
+	mr_buf_len = mr_buf_vec[mr_vec_idx].buf_len;
+
+	if (unlikely(mr_buf_len < mr_dev->vhost_hlen && mr_nr_vec <= 1))
+		return -1;
+
+	mr_hdr_addr = mr_buf_addr;
+	if (unlikely(mr_buf_len < mr_dev->vhost_hlen)) {
+		memset(&mr_tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+		mr_hdr = &mr_tmp_hdr;
+	} else {
+		mr_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)mr_hdr_addr;
+	}
+
+	if (unlikely(mr_buf_len < mr_dev->vhost_hlen)) {
+		mr_buf_offset = mr_dev->vhost_hlen - mr_buf_len;
+		mr_vec_idx++;
+		mr_buf_addr = mr_buf_vec[mr_vec_idx].buf_addr;
+		mr_buf_iova = mr_buf_vec[mr_vec_idx].buf_iova;
+		mr_buf_len = mr_buf_vec[mr_vec_idx].buf_len;
+		mr_buf_avail = mr_buf_len - mr_buf_offset;
+	} else {
+		mr_buf_offset = mr_dev->vhost_hlen;
+		mr_buf_avail = mr_buf_len - mr_dev->vhost_hlen;
+	}
+
+	/* copy mbuf to desc */
+	mbuf_avail = rte_pktmbuf_data_len(m);
+	mbuf_offset = 0;
+
+	if (async_iter_initialize(dev, async))
+		return -1;
+
+	while (mbuf_avail != 0 || m->next != NULL) {
+		/* done with current buf, get the next one */
+		if (buf_avail == 0) {
+			vec_idx++;
+			if (unlikely(vec_idx >= nr_vec))
+				goto error;
+
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
+			buf_offset = 0;
+			buf_avail = buf_len;
+		}
+
+		if (mr_buf_avail == 0) {
+			mr_vec_idx++;
+			if (unlikely(mr_vec_idx >= mr_nr_vec))
+				goto error;
+
+			mr_buf_addr = mr_buf_vec[mr_vec_idx].buf_addr;
+			mr_buf_iova = mr_buf_vec[mr_vec_idx].buf_iova;
+			mr_buf_len = mr_buf_vec[mr_vec_idx].buf_len;
+
+			mr_buf_offset = 0;
+			mr_buf_avail = mr_buf_len;
+		}
+
+		/* done with current mbuf, get the next one */
+		if (mbuf_avail == 0) {
+			m = m->next;
+
+			mbuf_offset = 0;
+			mbuf_avail = rte_pktmbuf_data_len(m);
+		}
+
+		if (hdr_addr) {
+			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
+
+			if (rxvq_is_mergeable(dev))
+				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
+							num_buffers);
+
+			if (unlikely(hdr == &tmp_hdr)) {
+				copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
+			} else {
+				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+						dev->vhost_hlen, 0);
+				vhost_log_cache_write_iova(dev, vq,
+								buf_vec[0].buf_iova,
+								dev->vhost_hlen);
+			}
+
+			hdr_addr = 0;
+		}
+
+		if (mr_hdr_addr) {
+			rte_memcpy(&mr_hdr->hdr, &hdr->hdr, sizeof(struct virtio_net_hdr));
+			if (rxvq_is_mergeable(mr_dev))
+				ASSIGN_UNLESS_EQUAL(mr_hdr->num_buffers,
+							mr_num_buffers);
+
+			if (unlikely(mr_hdr == &mr_tmp_hdr)) {
+				copy_vnet_hdr_to_desc(mr_dev, mr_vq, mr_buf_vec, mr_hdr);
+			} else {
+				PRINT_PACKET(mr_dev, (uintptr_t)mr_hdr_addr,
+						mr_dev->vhost_hlen, 0);
+				vhost_log_cache_write_iova(mr_dev, mr_vq,
+								mr_buf_vec[0].buf_iova,
+								mr_dev->vhost_hlen);
+			}
+
+			mr_hdr_addr = 0;
+		}
+
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
+		cpy_len = RTE_MIN(mr_buf_avail, cpy_len);
+
+		if (async_mirror_fill_seg(dev, vq, buf_iova + buf_offset,
+					mr_dev,	mr_buf_iova + mr_buf_offset,
+					m, mbuf_offset, cpy_len, true) < 0)
+			goto error;
+
+		mbuf_avail -= cpy_len;
+		mbuf_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
+		mr_buf_avail -= cpy_len;
+		mr_buf_offset += cpy_len;
+	}
+
+	async_iter_finalize(async);
+
+	return 0;
+error:
+	async_iter_cancel(async);
+
+	return -1;
+}
+
+static __rte_noinline uint32_t
+virtio_dev_ingress_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+			struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id)
+{
+	struct buf_vector buf_vec[BUF_VECTOR_MAX];
+	struct buf_vector mr_buf_vec[BUF_VECTOR_MAX];
+	uint32_t pkt_idx = 0;
+	uint16_t num_buffers;
+	uint16_t mr_num_buffers;
+	uint16_t avail_head;
+	uint16_t mr_avail_head;
+
+	struct vhost_async *async = vq->async;
+	struct vhost_async *mr_async = mr_vq->async;
+	struct async_inflight_info *pkts_info = async->pkts_info;
+	uint32_t pkt_err = 0;
+	int32_t n_xfer;
+	uint16_t slot_idx = 0;
+	uint16_t nr_vec, mr_nr_vec;
+
+	/* The ordering between avail index and desc reads need to be enforced. */
+	avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
+	mr_avail_head = __atomic_load_n(&mr_vq->avail->idx, __ATOMIC_ACQUIRE);
+
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	rte_prefetch0(&mr_vq->avail->ring[mr_vq->last_avail_idx & (mr_vq->size - 1)]);
+
+	async_iter_reset(async);
+
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint16_t vhost_hlen = dev->vhost_hlen > mr_dev->vhost_hlen ?
+					dev->vhost_hlen : mr_dev->vhost_hlen;
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vhost_hlen;
+
+
+		if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec,
+					&num_buffers, avail_head, &nr_vec) < 0)) {
+			VHOST_LOG_DATA(dev->ifname, DEBUG,
+					"failed to get enough desc from vring\n");
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
+
+		if (unlikely(reserve_avail_buf_split(mr_dev, mr_vq, pkt_len, mr_buf_vec,
+					&mr_num_buffers, mr_avail_head, &mr_nr_vec) < 0)) {
+			VHOST_LOG_DATA(mr_dev->ifname, DEBUG,
+					"failed to get enough desc from mirror vring\n");
+			vq->shadow_used_idx -= num_buffers;
+			mr_vq->shadow_used_idx -= mr_num_buffers;
+			break;
+		}
+
+		VHOST_LOG_DATA(dev->ifname, DEBUG,
+				"current index %d | end index %d\n",
+				vq->last_avail_idx, vq->last_avail_idx + num_buffers);
+
+		if (ingress_mbuf_to_desc(dev, vq, mr_dev, mr_vq,
+					buf_vec, nr_vec, num_buffers,
+					mr_buf_vec, mr_nr_vec, mr_num_buffers,
+					pkts[pkt_idx]) < 0) {
+			vq->shadow_used_idx -= num_buffers;
+			mr_vq->shadow_used_idx -= mr_num_buffers;
+			break;
+		}
+
+		slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
+		pkts_info[slot_idx].descs = num_buffers;
+		pkts_info[slot_idx].nr_buffers = mr_num_buffers;
+		pkts_info[slot_idx].mbuf = pkts[pkt_idx];
+
+		vq->last_avail_idx += num_buffers;
+		mr_vq->last_avail_idx += mr_num_buffers;
+
+	}
+
+	if (unlikely(pkt_idx == 0))
+		return 0;
+
+	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
+			async->iov_iter, pkt_idx);
+
+	pkt_err = pkt_idx - n_xfer;
+	if (unlikely(pkt_err)) {
+		uint16_t num_descs = 0;
+		uint16_t mr_num_descs = 0;
+
+		VHOST_LOG_DATA(dev->ifname, DEBUG,
+			"%s: failed to transfer %u packets for queue %u.\n",
+			__func__, pkt_err, vq->index);
+
+		/* update number of completed packets */
+		pkt_idx = n_xfer;
+
+		/* calculate the sum of descriptors to revert */
+		while (pkt_err-- > 0) {
+			num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
+			mr_num_descs += pkts_info[slot_idx & (vq->size - 1)].nr_buffers;
+			slot_idx--;
+		}
+
+		/* recover shadow used ring and available ring */
+		vq->shadow_used_idx -= num_descs;
+		vq->last_avail_idx -= num_descs;
+		mr_vq->shadow_used_idx -= mr_num_descs;
+		mr_vq->last_avail_idx -= mr_num_descs;
+	}
+
+	/* keep used descriptors */
+	if (likely(vq->shadow_used_idx)) {
+		uint16_t to = async->desc_idx_split & (vq->size - 1);
+
+		store_dma_desc_info_split(vq->shadow_used_split,
+				async->descs_split, vq->size, 0, to,
+				vq->shadow_used_idx);
+
+		async->desc_idx_split += vq->shadow_used_idx;
+		async->pkts_idx += pkt_idx;
+		if (async->pkts_idx >= vq->size)
+			async->pkts_idx -= vq->size;
+
+		async->pkts_inflight_n += pkt_idx;
+		vq->shadow_used_idx = 0;
+	}
+
+	if (likely(mr_vq->shadow_used_idx)) {
+		uint16_t to = mr_async->desc_idx_split & (mr_vq->size - 1);
+
+		store_dma_desc_info_split(mr_vq->shadow_used_split, mr_async->descs_split,
+					mr_vq->size, 0, to, mr_vq->shadow_used_idx);
+
+		mr_async->desc_idx_split += mr_vq->shadow_used_idx;
+		mr_vq->shadow_used_idx = 0;
+	}
+
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_ingress_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			struct virtio_net *mr_dev, struct vhost_virtqueue *mr_vq,
+			struct rte_mbuf **pkts, uint32_t count,
+			int16_t dma_id, uint16_t vchan_id)
+{
+	uint32_t nb_tx = 0;
+
+	VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
+
+	if (unlikely(!dma_copy_track[dma_id].vchans ||
+				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"%s: invalid channel %d:%u.\n",
+			 __func__, dma_id, vchan_id);
+		return 0;
+	}
+
+	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&mr_vq->access_lock);
+
+	if (unlikely(!vq->enabled || !vq->async ||
+		!mr_vq->enabled || !mr_vq->async))
+		goto out_access_unlock;
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+	if (mr_dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(mr_vq);
+
+	if (unlikely(!vq->access_ok))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+	if (unlikely(!mr_vq->access_ok))
+		if (unlikely(vring_translate(mr_dev, mr_vq) < 0))
+			goto out;
+
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+	if (count == 0)
+		goto out;
+
+	nb_tx = virtio_dev_ingress_async_submit_split(dev, vq,
+						mr_dev, mr_vq,
+						pkts, count, dma_id, vchan_id);
+
+out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+	if (mr_dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(mr_vq);
+
+out_access_unlock:
+	rte_spinlock_unlock(&mr_vq->access_lock);
+	rte_spinlock_unlock(&vq->access_lock);
+
+	return nb_tx;
+}
+
+uint16_t
+rte_vhost_submit_ingress_mirroring_burst(int vid, uint16_t queue_id,
+			int mr_vid, uint16_t mr_queue_id,
+			struct rte_mbuf **pkts, uint16_t count,
+			int16_t dma_id, uint16_t vchan_id)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct virtio_net *mr_dev = get_device(mr_vid);
+
+	if (!dev || !mr_dev)
+		return 0;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"%s: built-in vhost net backend is disabled.\n",
+			__func__);
+		return 0;
+	}
+	if (unlikely(!(mr_dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+			"%s: built-in vhost net backend is disabled.\n",
+			__func__);
+		return 0;
+	}
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		VHOST_LOG_DATA(dev->ifname, ERR,
+			"%s: invalid virtqueue idx %d.\n",
+			__func__, queue_id);
+		return 0;
+	}
+	if (unlikely(!is_valid_virt_queue_idx(mr_queue_id, 0, mr_dev->nr_vring))) {
+		VHOST_LOG_DATA(mr_dev->ifname, ERR,
+			"%s: invalid virtqueue idx %d.\n",
+			__func__, mr_queue_id);
+		return 0;
+	}
+
+	return virtio_dev_ingress_async_submit(dev, dev->virtqueue[queue_id],
+				mr_dev, mr_dev->virtqueue[mr_queue_id],
+				pkts, count, dma_id, vchan_id);
+}
 
 static __rte_always_inline uint16_t
 async_poll_egress_completed_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
@@ -4313,56 +4941,6 @@ async_poll_egress_completed_split(struct virtio_net *dev, struct vhost_virtqueue
 	return nr_cpl_pkts;
 }
 
-static __rte_always_inline int
-egress_async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t buf_iova,
-		struct virtio_net *mr_dev, uint64_t mr_buf_iova,
-		struct rte_mbuf *m, uint32_t mbuf_offset, uint32_t cpy_len)
-{
-	struct vhost_async *async = vq->async;
-	uint64_t mapped_len, mr_mapped_len;
-	uint32_t buf_offset = 0;
-	void *src, *dst, *mr_dst;
-	void *host_iova, *mr_host_iova;
-
-	while (cpy_len) {
-		host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
-					buf_iova + buf_offset, cpy_len,	&mapped_len);
-		if (unlikely(!host_iova)) {
-			VHOST_LOG_DATA(dev->ifname, ERR, "%s: failed to get host iova.\n", __func__);
-			return -1;
-		}
-
-		mr_host_iova = (void *)(uintptr_t)gpa_to_first_hpa(mr_dev,
-					mr_buf_iova + buf_offset, cpy_len, &mr_mapped_len);
-		if (unlikely(!mr_host_iova)) {
-			VHOST_LOG_DATA(mr_dev->ifname, ERR, "%s: failed to get mirror hpa.\n", __func__);
-			return -1;
-		}
-
-		if (unlikely(mr_mapped_len != mapped_len)) {
-			VHOST_LOG_DATA(dev->ifname, ERR, "original mapped len is not equal to mirror len\n");
-			return -1;
-		}
-
-		src = host_iova;
-		dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
-
-		mr_dst = mr_host_iova;
-
-		if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len)))
-			return -1;
-
-		if (unlikely(async_iter_add_iovec(dev, async, src, mr_dst, (size_t)mapped_len)))
-			return -1;
-
-		cpy_len -= (uint32_t)mapped_len;
-		mbuf_offset += (uint32_t)mapped_len;
-		buf_offset += (uint32_t)mapped_len;
-	}
-
-	return 0;
-}
-
 static __rte_always_inline int
 egress_async_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		  struct buf_vector *buf_vec, uint16_t nr_vec,
@@ -4477,9 +5055,9 @@ egress_async_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 		cpy_len = RTE_MIN(cpy_len, mr_buf_avail);
 
-		if (egress_async_fill_seg(dev, vq, buf_iova + buf_offset,
+		if (async_mirror_fill_seg(dev, vq, buf_iova + buf_offset,
 				mr_dev, mr_buf_iova + mr_buf_offset,
-				m, mbuf_offset, cpy_len) < 0)
+				m, mbuf_offset, cpy_len, false) < 0)
 			goto error;
 
 		mbuf_avail -= cpy_len;
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
  2023-04-21  1:09 [PATCH 0/2] vhost: add port mirroring function in the vhost lib Cheng Jiang
  2023-04-21  1:09 ` [PATCH 1/2] vhost: add ingress API for port mirroring datapath Cheng Jiang
  2023-04-21  1:09 ` [PATCH 2/2] vhost: add egress " Cheng Jiang
@ 2023-05-03  9:36 ` Maxime Coquelin
  2023-05-08 12:23   ` Jiang, Cheng1
  2 siblings, 1 reply; 8+ messages in thread
From: Maxime Coquelin @ 2023-05-03  9:36 UTC (permalink / raw)
  To: Cheng Jiang, chenbo.xia
  Cc: dev, jiayu.hu, xuan.ding, wenwux.ma, yuanx.wang, xingguang.he,
	David Marchand

Hi Cheng,

On 4/21/23 03:09, Cheng Jiang wrote:
> Similar to the port mirroring function on the switch or router, this
> patch set implements such function on the Vhost lib. When
> data is sent to a front-end, it will also send the data to its mirror
> front-end. When data is received from a front-end, it will also send
> the data to its mirror front-end.

Why not just keeping mirroring in the switch/router?
I am really not convinced this is the way to go:
1. API is too complex
2. It requires async support
3. There is too much code duplication, it increases  virtio-net.c by
    30%, and it is without packed ring support.
4. If mirror port is down for any reason, packets to/from the original
    port are dropped.
5. It seems to assume negotiated features of the two ports are
    identical, e.g. Virtio-net header length? If so, that's not a
    manageable solution.

Regards,
Maxime

> 
> Cheng Jiang (2):
>    vhost: add ingress API for port mirroring datapath
>    vhost: add egress API for port mirroring datapath
> 
>   lib/vhost/rte_vhost_async.h |   17 +
>   lib/vhost/version.map       |    3 +
>   lib/vhost/virtio_net.c      | 1266 +++++++++++++++++++++++++++++++++++
>   3 files changed, 1286 insertions(+)
> 
> --
> 2.35.1
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
  2023-05-03  9:36 ` [PATCH 0/2] vhost: add port mirroring function in the vhost lib Maxime Coquelin
@ 2023-05-08 12:23   ` Jiang, Cheng1
  2023-05-11  8:59     ` Xia, Chenbo
  0 siblings, 1 reply; 8+ messages in thread
From: Jiang, Cheng1 @ 2023-05-08 12:23 UTC (permalink / raw)
  To: Maxime Coquelin, Xia, Chenbo
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Wang, YuanX, He,
	Xingguang, David Marchand

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, May 3, 2023 5:37 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
> Marchand <david.marchand@redhat.com>
> Subject: Re: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
> 
> Hi Cheng,
> 
> On 4/21/23 03:09, Cheng Jiang wrote:
> > Similar to the port mirroring function on the switch or router, this
> > patch set implements such function on the Vhost lib. When data is sent
> > to a front-end, it will also send the data to its mirror front-end.
> > When data is received from a front-end, it will also send the data to
> > its mirror front-end.
> 
> Why not just keeping mirroring in the switch/router?
> I am really not convinced this is the way to go:
> 1. API is too complex
> 2. It requires async support
> 3. There is too much code duplication, it increases  virtio-net.c by
>     30%, and it is without packed ring support.
> 4. If mirror port is down for any reason, packets to/from the original
>     port are dropped.
> 5. It seems to assume negotiated features of the two ports are
>     identical, e.g. Virtio-net header length? If so, that's not a
>     manageable solution.

Thank you for your feedback.
I concur that placing the mirror function in the Vhost library is not ideal. We are currently considering implementing either a mirror Vhost PMD, or adding a function to TestPMD to handle this functionality.
Would you please share your thoughts on this plan and let us know which option you prefer?

Thanks a lot,
Cheng



> 
> Regards,
> Maxime
> 
> >
> > Cheng Jiang (2):
> >    vhost: add ingress API for port mirroring datapath
> >    vhost: add egress API for port mirroring datapath
> >
> >   lib/vhost/rte_vhost_async.h |   17 +
> >   lib/vhost/version.map       |    3 +
> >   lib/vhost/virtio_net.c      | 1266 +++++++++++++++++++++++++++++++++++
> >   3 files changed, 1286 insertions(+)
> >
> > --
> > 2.35.1
> >


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
  2023-05-08 12:23   ` Jiang, Cheng1
@ 2023-05-11  8:59     ` Xia, Chenbo
  2023-05-11 12:16       ` Maxime Coquelin
  0 siblings, 1 reply; 8+ messages in thread
From: Xia, Chenbo @ 2023-05-11  8:59 UTC (permalink / raw)
  To: Jiang, Cheng1, Maxime Coquelin
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Wang, YuanX, He,
	Xingguang, David Marchand

> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Monday, May 8, 2023 8:23 PM
> To: Maxime Coquelin <maxime.coquelin@redhat.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
> Marchand <david.marchand@redhat.com>
> Subject: RE: [PATCH 0/2] vhost: add port mirroring function in the vhost
> lib
> 
> Hi Maxime,
> 
> > -----Original Message-----
> > From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Sent: Wednesday, May 3, 2023 5:37 PM
> > To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
> > <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> > <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang, YuanX
> > <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
> > Marchand <david.marchand@redhat.com>
> > Subject: Re: [PATCH 0/2] vhost: add port mirroring function in the vhost
> lib
> >
> > Hi Cheng,
> >
> > On 4/21/23 03:09, Cheng Jiang wrote:
> > > Similar to the port mirroring function on the switch or router, this
> > > patch set implements such function on the Vhost lib. When data is sent
> > > to a front-end, it will also send the data to its mirror front-end.
> > > When data is received from a front-end, it will also send the data to
> > > its mirror front-end.
> >
> > Why not just keeping mirroring in the switch/router?
> > I am really not convinced this is the way to go:
> > 1. API is too complex
> > 2. It requires async support
> > 3. There is too much code duplication, it increases  virtio-net.c by
> >     30%, and it is without packed ring support.
> > 4. If mirror port is down for any reason, packets to/from the original
> >     port are dropped.
> > 5. It seems to assume negotiated features of the two ports are
> >     identical, e.g. Virtio-net header length? If so, that's not a
> >     manageable solution.
> 
> Thank you for your feedback.
> I concur that placing the mirror function in the Vhost library is not
> ideal. We are currently considering implementing either a mirror Vhost PMD,
> or adding a function to TestPMD to handle this functionality.
> Would you please share your thoughts on this plan and let us know which
> option you prefer?

Based on current implementation, it seems that vhost lib could be ignorant
of the mirroring usage. Making these logic into APP like testpmd seems to make
more sense.

Thanks,
Chenbo

> 
> Thanks a lot,
> Cheng
> 
> 
> 
> >
> > Regards,
> > Maxime
> >
> > >
> > > Cheng Jiang (2):
> > >    vhost: add ingress API for port mirroring datapath
> > >    vhost: add egress API for port mirroring datapath
> > >
> > >   lib/vhost/rte_vhost_async.h |   17 +
> > >   lib/vhost/version.map       |    3 +
> > >   lib/vhost/virtio_net.c      | 1266
> +++++++++++++++++++++++++++++++++++
> > >   3 files changed, 1286 insertions(+)
> > >
> > > --
> > > 2.35.1
> > >


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
  2023-05-11  8:59     ` Xia, Chenbo
@ 2023-05-11 12:16       ` Maxime Coquelin
  2023-05-17  7:34         ` Jiang, Cheng1
  0 siblings, 1 reply; 8+ messages in thread
From: Maxime Coquelin @ 2023-05-11 12:16 UTC (permalink / raw)
  To: Xia, Chenbo, Jiang, Cheng1
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Wang, YuanX, He,
	Xingguang, David Marchand



On 5/11/23 10:59, Xia, Chenbo wrote:
>> -----Original Message-----
>> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
>> Sent: Monday, May 8, 2023 8:23 PM
>> To: Maxime Coquelin <maxime.coquelin@redhat.com>; Xia, Chenbo
>> <chenbo.xia@intel.com>
>> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
>> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang, YuanX
>> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
>> Marchand <david.marchand@redhat.com>
>> Subject: RE: [PATCH 0/2] vhost: add port mirroring function in the vhost
>> lib
>>
>> Hi Maxime,
>>
>>> -----Original Message-----
>>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>>> Sent: Wednesday, May 3, 2023 5:37 PM
>>> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
>>> <chenbo.xia@intel.com>
>>> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
>>> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang, YuanX
>>> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
>>> Marchand <david.marchand@redhat.com>
>>> Subject: Re: [PATCH 0/2] vhost: add port mirroring function in the vhost
>> lib
>>>
>>> Hi Cheng,
>>>
>>> On 4/21/23 03:09, Cheng Jiang wrote:
>>>> Similar to the port mirroring function on the switch or router, this
>>>> patch set implements such function on the Vhost lib. When data is sent
>>>> to a front-end, it will also send the data to its mirror front-end.
>>>> When data is received from a front-end, it will also send the data to
>>>> its mirror front-end.
>>>
>>> Why not just keeping mirroring in the switch/router?
>>> I am really not convinced this is the way to go:
>>> 1. API is too complex
>>> 2. It requires async support
>>> 3. There is too much code duplication, it increases  virtio-net.c by
>>>      30%, and it is without packed ring support.
>>> 4. If mirror port is down for any reason, packets to/from the original
>>>      port are dropped.
>>> 5. It seems to assume negotiated features of the two ports are
>>>      identical, e.g. Virtio-net header length? If so, that's not a
>>>      manageable solution.
>>
>> Thank you for your feedback.
>> I concur that placing the mirror function in the Vhost library is not
>> ideal. We are currently considering implementing either a mirror Vhost PMD,
>> or adding a function to TestPMD to handle this functionality.
>> Would you please share your thoughts on this plan and let us know which
>> option you prefer?
> 
> Based on current implementation, it seems that vhost lib could be ignorant
> of the mirroring usage. Making these logic into APP like testpmd seems to make
> more sense.

I agree with Chenbo, it should be done at the application level, which
would enable to also mirror non-Vhost ports.

Thanks,
Maxime

> Thanks,
> Chenbo
> 
>>
>> Thanks a lot,
>> Cheng
>>
>>
>>
>>>
>>> Regards,
>>> Maxime
>>>
>>>>
>>>> Cheng Jiang (2):
>>>>     vhost: add ingress API for port mirroring datapath
>>>>     vhost: add egress API for port mirroring datapath
>>>>
>>>>    lib/vhost/rte_vhost_async.h |   17 +
>>>>    lib/vhost/version.map       |    3 +
>>>>    lib/vhost/virtio_net.c      | 1266
>> +++++++++++++++++++++++++++++++++++
>>>>    3 files changed, 1286 insertions(+)
>>>>
>>>> --
>>>> 2.35.1
>>>>
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
  2023-05-11 12:16       ` Maxime Coquelin
@ 2023-05-17  7:34         ` Jiang, Cheng1
  0 siblings, 0 replies; 8+ messages in thread
From: Jiang, Cheng1 @ 2023-05-17  7:34 UTC (permalink / raw)
  To: Maxime Coquelin, Xia, Chenbo
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Wang, YuanX, He,
	Xingguang, David Marchand

Hi Maxime/Chenbo,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, May 11, 2023 8:16 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
> Marchand <david.marchand@redhat.com>
> Subject: Re: [PATCH 0/2] vhost: add port mirroring function in the vhost lib
> 
> 
> 
> On 5/11/23 10:59, Xia, Chenbo wrote:
> >> -----Original Message-----
> >> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> >> Sent: Monday, May 8, 2023 8:23 PM
> >> To: Maxime Coquelin <maxime.coquelin@redhat.com>; Xia, Chenbo
> >> <chenbo.xia@intel.com>
> >> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> >> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang,
> YuanX
> >> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; David
> >> Marchand <david.marchand@redhat.com>
> >> Subject: RE: [PATCH 0/2] vhost: add port mirroring function in the
> >> vhost lib
> >>
> >> Hi Maxime,
> >>
> >>> -----Original Message-----
> >>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> >>> Sent: Wednesday, May 3, 2023 5:37 PM
> >>> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
> >>> <chenbo.xia@intel.com>
> >>> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> >>> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Wang,
> YuanX
> >>> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>;
> >>> David Marchand <david.marchand@redhat.com>
> >>> Subject: Re: [PATCH 0/2] vhost: add port mirroring function in the
> >>> vhost
> >> lib
> >>>
> >>> Hi Cheng,
> >>>
> >>> On 4/21/23 03:09, Cheng Jiang wrote:
> >>>> Similar to the port mirroring function on the switch or router,
> >>>> this patch set implements such function on the Vhost lib. When data
> >>>> is sent to a front-end, it will also send the data to its mirror front-end.
> >>>> When data is received from a front-end, it will also send the data
> >>>> to its mirror front-end.
> >>>
> >>> Why not just keeping mirroring in the switch/router?
> >>> I am really not convinced this is the way to go:
> >>> 1. API is too complex
> >>> 2. It requires async support
> >>> 3. There is too much code duplication, it increases  virtio-net.c by
> >>>      30%, and it is without packed ring support.
> >>> 4. If mirror port is down for any reason, packets to/from the original
> >>>      port are dropped.
> >>> 5. It seems to assume negotiated features of the two ports are
> >>>      identical, e.g. Virtio-net header length? If so, that's not a
> >>>      manageable solution.
> >>
> >> Thank you for your feedback.
> >> I concur that placing the mirror function in the Vhost library is not
> >> ideal. We are currently considering implementing either a mirror
> >> Vhost PMD, or adding a function to TestPMD to handle this functionality.
> >> Would you please share your thoughts on this plan and let us know
> >> which option you prefer?
> >
> > Based on current implementation, it seems that vhost lib could be
> > ignorant of the mirroring usage. Making these logic into APP like
> > testpmd seems to make more sense.
> 
> I agree with Chenbo, it should be done at the application level, which would
> enable to also mirror non-Vhost ports.

I mostly agree with your opinions. However, the workload for implementing a general mirror framework to support all ports in the application level would be too large for us for now. Currently, we are more focused on demonstrating the superior performance advantages of async Vhost and DSA with this mirror work. Therefore, we may need to temporarily put this upstream process on hold.

I greatly appreciate your feedback and help. Maybe we can discuss the details in the future. Thank you again!

Best regards,
Cheng

> 
> Thanks,
> Maxime
> 
> > Thanks,
> > Chenbo
> >
> >>
> >> Thanks a lot,
> >> Cheng
> >>
> >>
> >>
> >>>
> >>> Regards,
> >>> Maxime
> >>>
> >>>>
> >>>> Cheng Jiang (2):
> >>>>     vhost: add ingress API for port mirroring datapath
> >>>>     vhost: add egress API for port mirroring datapath
> >>>>
> >>>>    lib/vhost/rte_vhost_async.h |   17 +
> >>>>    lib/vhost/version.map       |    3 +
> >>>>    lib/vhost/virtio_net.c      | 1266
> >> +++++++++++++++++++++++++++++++++++
> >>>>    3 files changed, 1286 insertions(+)
> >>>>
> >>>> --
> >>>> 2.35.1
> >>>>
> >


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-05-17  7:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-21  1:09 [PATCH 0/2] vhost: add port mirroring function in the vhost lib Cheng Jiang
2023-04-21  1:09 ` [PATCH 1/2] vhost: add ingress API for port mirroring datapath Cheng Jiang
2023-04-21  1:09 ` [PATCH 2/2] vhost: add egress " Cheng Jiang
2023-05-03  9:36 ` [PATCH 0/2] vhost: add port mirroring function in the vhost lib Maxime Coquelin
2023-05-08 12:23   ` Jiang, Cheng1
2023-05-11  8:59     ` Xia, Chenbo
2023-05-11 12:16       ` Maxime Coquelin
2023-05-17  7:34         ` Jiang, Cheng1

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).