DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor
@ 2020-12-18 11:33 Cheng Jiang
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 1/3] examples/vhost: add ioat ring space count and check Cheng Jiang
                   ` (10 more replies)
  0 siblings, 11 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-18 11:33 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, and
refactor vhost async data path.

Cheng Jiang (3):
  examples/vhost: add ioat ring space count and check
  examples/vhost: optimize vhost data path for batch
  examples/vhost: refactor vhost async data path

 examples/vhost/ioat.c |  15 ++---
 examples/vhost/main.c | 144 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   2 +-
 3 files changed, 110 insertions(+), 51 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 1/3] examples/vhost: add ioat ring space count and check
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-18 11:33 ` Cheng Jiang
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 2/3] examples/vhost: optimize vhost data path for batch Cheng Jiang
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-18 11:33 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f5..b0b04aa453 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
+		cb_tracker[dev_id].ioat_space += n_seg;
 		n_seg += cb_tracker[dev_id].last_remain;
 		if (!n_seg)
 			return 0;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 2/3] examples/vhost: optimize vhost data path for batch
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 1/3] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2020-12-18 11:33 ` Cheng Jiang
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 3/3] examples/vhost: refactor vhost async data path Cheng Jiang
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-18 11:33 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 84 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 75 insertions(+), 9 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038bf..28226a4ff7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -182,6 +182,11 @@ struct mbuf_table {
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+static uint64_t vhost_tsc[MAX_VHOST_DEVICE];
+
+/* TX queue for each vhost device. */
+struct mbuf_table vhost_m_table[MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,6 +809,13 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
 virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
@@ -837,6 +849,40 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *dst_vdev, struct rte_mbuf **m, uint16_t nr_xmit)
+{
+	uint16_t ret, nr_cpl;
+	struct rte_mbuf *m_cpl[MAX_PKT_BURST];
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+		dst_vdev->nr_async_pkts += ret;
+		free_pkts(&m[ret], nr_xmit - ret);
+
+		while (likely(dst_vdev->nr_async_pkts)) {
+			nr_cpl = rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+					VIRTIO_RXQ, m_cpl, MAX_PKT_BURST);
+			dst_vdev->nr_async_pkts -= nr_cpl;
+			free_pkts(m_cpl, nr_cpl);
+		}
+	} else {
+		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		rte_atomic64_add(&dst_vdev->stats.rx_total_atomic, nr_xmit);
+		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,6 +892,7 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
+	struct mbuf_table *vhost_txq;
 
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
@@ -869,7 +916,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = &vhost_m_table[dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev, vhost_txq->m_table, MAX_PKT_BURST);
+		vhost_txq->len = 0;
+		vhost_tsc[dst_vdev->vid] = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +999,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -986,7 +1038,6 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 	/*check if destination is local VM*/
 	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
 		return;
 	}
 
@@ -1144,8 +1195,10 @@ static __rte_always_inline void
 drain_virtio_tx(struct vhost_dev *vdev)
 {
 	struct rte_mbuf *pkts[MAX_PKT_BURST];
+	struct mbuf_table *vhost_txq;
 	uint16_t count;
 	uint16_t i;
+	uint64_t cur_tsc;
 
 	if (builtin_net_driver) {
 		count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
@@ -1163,6 +1216,19 @@ drain_virtio_tx(struct vhost_dev *vdev)
 
 	for (i = 0; i < count; ++i)
 		virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
+
+	vhost_txq = &vhost_m_table[vdev->vid];
+	cur_tsc = rte_rdtsc();
+	if (unlikely(cur_tsc - vhost_tsc[vdev->vid] > MBUF_TABLE_DRAIN_TSC)) {
+		if (vhost_txq->len) {
+			RTE_LOG_DP(DEBUG, VHOST_DATA,
+				"Vhost tX queue drained after timeout with burst size %u\n",
+				vhost_txq->len);
+			drain_vhost(vdev, vhost_txq->m_table, vhost_txq->len);
+			vhost_txq->len = 0;
+			vhost_tsc[vdev->vid] = cur_tsc;
+		}
+	}
 }
 
 /*
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 3/3] examples/vhost: refactor vhost async data path
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 1/3] examples/vhost: add ioat ring space count and check Cheng Jiang
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 2/3] examples/vhost: optimize vhost data path for batch Cheng Jiang
@ 2020-12-18 11:33 ` Cheng Jiang
  2020-12-22  8:36 ` [dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor Cheng Jiang
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-18 11:33 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Support latest async vhost api, refactor vhost async data path, and
clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 88 ++++++++++++++++++++-----------------------
 examples/vhost/main.h |  2 +-
 2 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 28226a4ff7..0113147876 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -817,26 +817,26 @@ free_pkts(struct rte_mbuf **pkts, uint16_t n)
 }
 
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	rte_atomic16_sub(&vdev->nr_async_pkts, complete_count);
+	if (complete_count)
+		free_pkts(p_cpl, complete_count);
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
@@ -850,25 +850,25 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 }
 
 static __rte_always_inline void
-drain_vhost(struct vhost_dev *dst_vdev, struct rte_mbuf **m, uint16_t nr_xmit)
+drain_vhost(struct vhost_dev *dst_vdev)
 {
-	uint16_t ret, nr_cpl;
-	struct rte_mbuf *m_cpl[MAX_PKT_BURST];
+	uint16_t ret;
+	uint16_t nr_xmit = vhost_m_table[dst_vdev->vid].len;
+	struct rte_mbuf **m = vhost_m_table[dst_vdev->vid].m_table;
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, m, nr_xmit);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+		complete_async_pkts(dst_vdev);
+		while (rte_atomic16_read(&dst_vdev->nr_async_pkts) >= 128)
+			complete_async_pkts(dst_vdev);
+
 		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						m, nr_xmit);
-		dst_vdev->nr_async_pkts += ret;
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		rte_atomic16_add(&dst_vdev->nr_async_pkts, ret - cpu_cpl_nr);
 		free_pkts(&m[ret], nr_xmit - ret);
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			nr_cpl = rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, MAX_PKT_BURST);
-			dst_vdev->nr_async_pkts -= nr_cpl;
-			free_pkts(m_cpl, nr_cpl);
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
 						m, nr_xmit);
@@ -925,7 +925,7 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 	}
 
 	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
-		drain_vhost(dst_vdev, vhost_txq->m_table, MAX_PKT_BURST);
+		drain_vhost(dst_vdev);
 		vhost_txq->len = 0;
 		vhost_tsc[dst_vdev->vid] = rte_rdtsc();
 	}
@@ -1031,7 +1031,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
@@ -1124,31 +1124,17 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
 	uint16_t rx_count, enqueue_count;
+	uint32_t cpu_cpl_nr;
 	struct rte_mbuf *pkts[MAX_PKT_BURST];
+	struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
 
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1170,13 +1156,21 @@ drain_eth_rx(struct vhost_dev *vdev)
 		}
 	}
 
+	complete_async_pkts(vdev);
+	while (rte_atomic16_read(&vdev->nr_async_pkts) >= 128)
+		complete_async_pkts(vdev);
+
 	if (builtin_net_driver) {
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		rte_atomic16_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
@@ -1224,7 +1218,7 @@ drain_virtio_tx(struct vhost_dev *vdev)
 			RTE_LOG_DP(DEBUG, VHOST_DATA,
 				"Vhost tX queue drained after timeout with burst size %u\n",
 				vhost_txq->len);
-			drain_vhost(vdev, vhost_txq->m_table, vhost_txq->len);
+			drain_vhost(vdev);
 			vhost_txq->len = 0;
 			vhost_tsc[vdev->vid] = cur_tsc;
 		}
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae81..d33ddb411b 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	rte_atomic16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (2 preceding siblings ...)
  2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 3/3] examples/vhost: refactor vhost async data path Cheng Jiang
@ 2020-12-22  8:36 ` Cheng Jiang
  2020-12-22  8:36   ` [dpdk-dev] [PATCH v2 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2020-12-22  8:36   ` [dpdk-dev] [PATCH v2 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2020-12-24  8:49 ` [dpdk-dev] [PATCH v3 0/2] examples/vhost: sample code refactor Cheng Jiang
                   ` (6 subsequent siblings)
  10 siblings, 2 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-22  8:36 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replase
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v2:
 * optimized patch structure
 * optimized git log
 * replased rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  15 ++--
 examples/vhost/main.c | 163 +++++++++++++++++++++++++++++-------------
 examples/vhost/main.h |   7 +-
 3 files changed, 125 insertions(+), 60 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 1/2] examples/vhost: add ioat ring space count and check
  2020-12-22  8:36 ` [dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-22  8:36   ` Cheng Jiang
  2020-12-22  8:36   ` [dpdk-dev] [PATCH v2 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  1 sibling, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-22  8:36 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f5..b0b04aa453 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
+		cb_tracker[dev_id].ioat_space += n_seg;
 		n_seg += cb_tracker[dev_id].last_remain;
 		if (!n_seg)
 			return 0;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 2/2] examples/vhost: refactor vhost data path
  2020-12-22  8:36 ` [dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor Cheng Jiang
  2020-12-22  8:36   ` [dpdk-dev] [PATCH v2 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2020-12-22  8:36   ` Cheng Jiang
  1 sibling, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-22  8:36 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replase rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 163 +++++++++++++++++++++++++++++-------------
 examples/vhost/main.h |   7 +-
 2 files changed, 118 insertions(+), 52 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038bf..d400939a2d 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,18 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/* TX queue for each vhost device. */
+struct vhost_bufftable vhost_bufftable[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,39 +813,82 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_bufftable[queue_id].len;
+	struct rte_mbuf **m = vhost_bufftable[queue_id].m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+		if (nr_xmit - ret)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +898,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	const uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -869,7 +922,20 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = &vhost_bufftable[lcore_id * MAX_VHOST_DEVICE
+					+ dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +1006,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -979,14 +1038,13 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
 	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
 		return;
 	}
 
@@ -1073,19 +1131,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1095,9 +1140,6 @@ drain_eth_rx(struct vhost_dev *vdev)
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1123,17 +1165,26 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (enqueue_count > cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+		free_pkts(&pkts[enqueue_count], rx_count - enqueue_count);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
 	if (!async_vhost_driver)
@@ -1144,8 +1195,11 @@ static __rte_always_inline void
 drain_virtio_tx(struct vhost_dev *vdev)
 {
 	struct rte_mbuf *pkts[MAX_PKT_BURST];
+	struct vhost_bufftable *vhost_txq;
+	const uint16_t lcore_id = rte_lcore_id();
 	uint16_t count;
 	uint16_t i;
+	uint64_t cur_tsc;
 
 	if (builtin_net_driver) {
 		count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
@@ -1163,6 +1217,17 @@ drain_virtio_tx(struct vhost_dev *vdev)
 
 	for (i = 0; i < count; ++i)
 		virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
+
+	vhost_txq = &vhost_bufftable[lcore_id * MAX_VHOST_DEVICE + vdev->vid];
+	cur_tsc = rte_rdtsc();
+	if (unlikely(cur_tsc - vhost_txq->pre_tsc > MBUF_TABLE_DRAIN_TSC)) {
+		RTE_LOG_DP(DEBUG, VHOST_DATA,
+			"Vhost tX queue drained after timeout with burst size %u\n",
+			vhost_txq->len);
+		drain_vhost(vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = cur_tsc;
+	}
 }
 
 /*
@@ -1392,8 +1457,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae81..6aa798a3e2 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>
 
 #include <rte_ether.h>
+#include <stdatomic.h>
 
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (3 preceding siblings ...)
  2020-12-22  8:36 ` [dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-24  8:49 ` Cheng Jiang
  2020-12-24  8:49   ` [dpdk-dev] [PATCH v3 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2020-12-24  8:49   ` [dpdk-dev] [PATCH v3 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2020-12-25  8:07 ` [dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor Cheng Jiang
                   ` (5 subsequent siblings)
  10 siblings, 2 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-24  8:49 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replase
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replased rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  15 ++--
 examples/vhost/main.c | 168 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   7 +-
 3 files changed, 130 insertions(+), 60 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 1/2] examples/vhost: add ioat ring space count and check
  2020-12-24  8:49 ` [dpdk-dev] [PATCH v3 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-24  8:49   ` Cheng Jiang
  2020-12-24  8:49   ` [dpdk-dev] [PATCH v3 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  1 sibling, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-24  8:49 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f..b0b04aa45 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
+		cb_tracker[dev_id].ioat_space += n_seg;
 		n_seg += cb_tracker[dev_id].last_remain;
 		if (!n_seg)
 			return 0;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 2/2] examples/vhost: refactor vhost data path
  2020-12-24  8:49 ` [dpdk-dev] [PATCH v3 0/2] examples/vhost: sample code refactor Cheng Jiang
  2020-12-24  8:49   ` [dpdk-dev] [PATCH v3 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2020-12-24  8:49   ` Cheng Jiang
  1 sibling, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-24  8:49 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replase rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 168 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   7 +-
 2 files changed, 123 insertions(+), 52 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038b..efc044b28 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,18 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/* TX queue for each vhost device. */
+struct vhost_bufftable vhost_bufftable[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,39 +813,84 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_bufftable[queue_id].len;
+	struct rte_mbuf **m = vhost_bufftable[queue_id].m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +900,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	const uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -869,7 +924,20 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = &vhost_bufftable[lcore_id * MAX_VHOST_DEVICE
+					+ dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +1008,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -979,14 +1040,13 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
 	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
 		return;
 	}
 
@@ -1073,19 +1133,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1095,9 +1142,6 @@ drain_eth_rx(struct vhost_dev *vdev)
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1123,17 +1167,29 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
 	if (!async_vhost_driver)
@@ -1144,8 +1200,11 @@ static __rte_always_inline void
 drain_virtio_tx(struct vhost_dev *vdev)
 {
 	struct rte_mbuf *pkts[MAX_PKT_BURST];
+	struct vhost_bufftable *vhost_txq;
+	const uint16_t lcore_id = rte_lcore_id();
 	uint16_t count;
 	uint16_t i;
+	uint64_t cur_tsc;
 
 	if (builtin_net_driver) {
 		count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
@@ -1163,6 +1222,17 @@ drain_virtio_tx(struct vhost_dev *vdev)
 
 	for (i = 0; i < count; ++i)
 		virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
+
+	vhost_txq = &vhost_bufftable[lcore_id * MAX_VHOST_DEVICE + vdev->vid];
+	cur_tsc = rte_rdtsc();
+	if (unlikely(cur_tsc - vhost_txq->pre_tsc > MBUF_TABLE_DRAIN_TSC)) {
+		RTE_LOG_DP(DEBUG, VHOST_DATA,
+			"Vhost tX queue drained after timeout with burst size %u\n",
+			vhost_txq->len);
+		drain_vhost(vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = cur_tsc;
+	}
 }
 
 /*
@@ -1392,8 +1462,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae8..6aa798a3e 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>
 
 #include <rte_ether.h>
+#include <stdatomic.h>
 
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (4 preceding siblings ...)
  2020-12-24  8:49 ` [dpdk-dev] [PATCH v3 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-25  8:07 ` Cheng Jiang
  2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2020-12-28  7:16 ` [dpdk-dev] [PATCH v5 0/2] examples/vhost: sample code refactor Cheng Jiang
                   ` (4 subsequent siblings)
  10 siblings, 2 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-25  8:07 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replase
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replased rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  15 ++--
 examples/vhost/main.c | 202 +++++++++++++++++++++++++++++++-----------
 examples/vhost/main.h |   7 +-
 3 files changed, 161 insertions(+), 63 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check
  2020-12-25  8:07 ` [dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-25  8:07   ` Cheng Jiang
  2020-12-28  2:50     ` Hu, Jiayu
  2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  1 sibling, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2020-12-25  8:07 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f..b0b04aa45 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
+		cb_tracker[dev_id].ioat_space += n_seg;
 		n_seg += cb_tracker[dev_id].last_remain;
 		if (!n_seg)
 			return 0;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path
  2020-12-25  8:07 ` [dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor Cheng Jiang
  2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2020-12-25  8:07   ` Cheng Jiang
  2020-12-28  4:03     ` Hu, Jiayu
  1 sibling, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2020-12-25  8:07 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replase rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 202 +++++++++++++++++++++++++++++++-----------
 examples/vhost/main.h |   7 +-
 2 files changed, 154 insertions(+), 55 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038b..3ea12a474 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,18 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/* TX queue for each vhost device. */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,39 +813,114 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_txbuff[queue_id]->len;
+	struct rte_mbuf **m = vhost_txbuff[queue_id]->m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
+static __rte_always_inline void
+drain_vhost_table(void)
+{
+	const uint16_t lcore_id = rte_lcore_id();
+	struct vhost_bufftable *vhost_txq;
+	struct vhost_dev *vdev;
+	uint64_t cur_tsc;
+
+	TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, lcore_vdev_entry) {
+		if (!vdev->remove) {
+			vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
+					+ vdev->vid];
+			cur_tsc = rte_rdtsc();
+
+			if (unlikely(cur_tsc - vhost_txq->pre_tsc
+					> MBUF_TABLE_DRAIN_TSC)) {
+				RTE_LOG_DP(DEBUG, VHOST_DATA,
+					"Vhost tX queue drained after timeout with burst size %u\n",
+					vhost_txq->len);
+				drain_vhost(vdev);
+				vhost_txq->len = 0;
+				vhost_txq->pre_tsc = cur_tsc;
+			}
+		}
+	}
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +930,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	const uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -869,7 +954,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +1037,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -979,16 +1069,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
-	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
+	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
 		return;
-	}
 
 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
 		if (unlikely(find_local_dest(vdev, m, &offset,
@@ -1073,19 +1161,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1095,9 +1170,6 @@ drain_eth_rx(struct vhost_dev *vdev)
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1123,17 +1195,31 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
 	if (!async_vhost_driver)
@@ -1202,7 +1288,7 @@ switch_worker(void *arg __rte_unused)
 
 	while(1) {
 		drain_mbuf_table(tx_q);
-
+		drain_vhost_table();
 		/*
 		 * Inform the configuration core that we have exited the
 		 * linked list and that no devices are in use if requested.
@@ -1298,6 +1384,7 @@ static int
 new_device(int vid)
 {
 	int lcore, core_add = 0;
+	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
@@ -1309,6 +1396,13 @@ new_device(int vid)
 	}
 	vdev->vid = vid;
 
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
+			= rte_zmalloc("vhost bufftable",
+				sizeof(struct vhost_bufftable),
+				RTE_CACHE_LINE_SIZE);
+	}
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
 
@@ -1343,12 +1437,15 @@ new_device(int vid)
 	if (async_vhost_driver) {
 		struct rte_vhost_async_features f;
 		struct rte_vhost_async_channel_ops channel_ops;
+
 		if (strncmp(dma_type, "ioat", 4) == 0) {
 			channel_ops.transfer_data = ioat_transfer_data_cb;
 			channel_ops.check_completed_copies =
 				ioat_check_completed_copies_cb;
+
 			f.async_inorder = 1;
 			f.async_threshold = 256;
+
 			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
 				f.intval, &channel_ops);
 		}
@@ -1392,8 +1489,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
@@ -1592,6 +1689,7 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+
 		if (async_vhost_driver)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
 
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae8..6aa798a3e 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>
 
 #include <rte_ether.h>
+#include <stdatomic.h>
 
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check
  2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2020-12-28  2:50     ` Hu, Jiayu
  2020-12-28  8:08       ` Jiang, Cheng1
  0 siblings, 1 reply; 44+ messages in thread
From: Hu, Jiayu @ 2020-12-28  2:50 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Xia, Chenbo; +Cc: dev, Yang, YvonneX

Hi Cheng,

> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Friday, December 25, 2020 4:07 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> Subject: [PATCH v4 1/2] examples/vhost: add ioat ring space count and check
> 
> Add ioat ring space count and check, if ioat ring space is not enough
> for the next async vhost packet enqueue, then just return to prevent
> enqueue failure.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> ---
>  examples/vhost/ioat.c | 15 +++++++--------
>  1 file changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
> index 71d8a1f1f..b0b04aa45 100644
> --- a/examples/vhost/ioat.c
> +++ b/examples/vhost/ioat.c
> @@ -17,6 +17,7 @@ struct packet_tracker {
>  	unsigned short next_read;
>  	unsigned short next_write;
>  	unsigned short last_remain;
> +	unsigned short ioat_space;
>  };
> 
>  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
> @@ -113,7 +114,7 @@ open_ioat(const char *value)
>  			goto out;
>  		}
>  		rte_rawdev_start(dev_id);
> -
> +		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
>  		dma_info->nr++;
>  		i++;
>  	}
> @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  			src = descs[i_desc].src;
>  			dst = descs[i_desc].dst;
>  			i_seg = 0;
> +			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
> +				break;
>  			while (i_seg < src->nr_segs) {
> -				/*
> -				 * TODO: Assuming that the ring space of the
> -				 * IOAT device is large enough, so there is no
> -				 * error here, and the actual error handling
> -				 * will be added later.
> -				 */
>  				rte_ioat_enqueue_copy(dev_id,
>  					(uintptr_t)(src->iov[i_seg].iov_base)
>  						+ src->offset,
> @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  				i_seg++;
>  			}
>  			write &= mask;
> -			cb_tracker[dev_id].size_track[write] = i_seg;
> +			cb_tracker[dev_id].size_track[write] = src->nr_segs;
> +			cb_tracker[dev_id].ioat_space -= src->nr_segs;
>  			write++;
>  		}
>  	} else {
> @@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t
> queue_id,
>  		int dev_id = dma_bind[vid].dmas[queue_id * 2
>  				+ VIRTIO_RXQ].dev_id;
>  		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
> +		cb_tracker[dev_id].ioat_space += n_seg;

rte_ioat_completed_ops() may fail. In this case, its return value is -1, which
will cause n_seg to 65534.

Thanks,
Jiayu

>  		n_seg += cb_tracker[dev_id].last_remain;
>  		if (!n_seg)
>  			return 0;
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path
  2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path Cheng Jiang
@ 2020-12-28  4:03     ` Hu, Jiayu
  2020-12-28  8:21       ` Jiang, Cheng1
  0 siblings, 1 reply; 44+ messages in thread
From: Hu, Jiayu @ 2020-12-28  4:03 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Xia, Chenbo; +Cc: dev, Yang, YvonneX

Hi Cheng,

Some comments are inline.

Thanks,
Jiayu
> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Friday, December 25, 2020 4:07 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> Subject: [PATCH v4 2/2] examples/vhost: refactor vhost data path
> 
> Change the vm2vm data path to batch enqueue for better performance.
> Support latest async vhost API, refactor vhost async data path,
> replase rte_atomicNN_xxx to atomic_XXX and clean some codes.

Typo: replase -> replace

> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> ---
>  examples/vhost/main.c | 202 +++++++++++++++++++++++++++++++-----------
>  examples/vhost/main.h |   7 +-
>  2 files changed, 154 insertions(+), 55 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index 8d8c3038b..3ea12a474 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -179,9 +179,18 @@ struct mbuf_table {
>  	struct rte_mbuf *m_table[MAX_PKT_BURST];
>  };
> 
> +struct vhost_bufftable {
> +	uint32_t len;
> +	uint64_t pre_tsc;
> +	struct rte_mbuf *m_table[MAX_PKT_BURST];
> +};
> +
>  /* TX queue for each data core. */
>  struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
> 
> +/* TX queue for each vhost device. */

Every lcore maintains a TX buffer for every vhost device,
which is to batch pkts to enqueue for higher performance.
I suggest you to update the description of vhost_txbuff above,
as it is not very clear.

> +struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE *
> MAX_VHOST_DEVICE];
> +
>  #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
>  				 / US_PER_S * BURST_TX_DRAIN_US)
>  #define VLAN_HLEN       4
> @@ -804,39 +813,114 @@ unlink_vmdq(struct vhost_dev *vdev)
>  	}
>  }
> 
> +static inline void
> +free_pkts(struct rte_mbuf **pkts, uint16_t n)
> +{
> +	while (n--)
> +		rte_pktmbuf_free(pkts[n]);
> +}
> +
>  static __rte_always_inline void
> -virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
> +complete_async_pkts(struct vhost_dev *vdev)
> +{
> +	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> +	uint16_t complete_count;
> +
> +	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> +					VIRTIO_RXQ, p_cpl,
> MAX_PKT_BURST);
> +	if (complete_count) {
> +		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
> +		free_pkts(p_cpl, complete_count);
> +	}
> +}
> +
> +static __rte_always_inline void
> +sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
>  	    struct rte_mbuf *m)
>  {
>  	uint16_t ret;
> -	struct rte_mbuf *m_cpl[1];
> 
>  	if (builtin_net_driver) {
>  		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
> -	} else if (async_vhost_driver) {
> -		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid,
> VIRTIO_RXQ,
> -						&m, 1);
> -
> -		if (likely(ret))
> -			dst_vdev->nr_async_pkts++;
> -
> -		while (likely(dst_vdev->nr_async_pkts)) {
> -			if (rte_vhost_poll_enqueue_completed(dst_vdev-
> >vid,
> -					VIRTIO_RXQ, m_cpl, 1))
> -				dst_vdev->nr_async_pkts--;
> -		}
>  	} else {
>  		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
> &m, 1);
>  	}
> 
>  	if (enable_stats) {
> -		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
> -		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
> +		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
> +		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
>  		src_vdev->stats.tx_total++;
>  		src_vdev->stats.tx += ret;
>  	}
>  }
> 
> +static __rte_always_inline void
> +drain_vhost(struct vhost_dev *vdev)
> +{
> +	uint16_t ret;
> +	uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev-
> >vid;
> +	uint16_t nr_xmit = vhost_txbuff[queue_id]->len;
> +	struct rte_mbuf **m = vhost_txbuff[queue_id]->m_table;

"queue_id" is not a very good name, as it's not the queue id of vhost device,
but a buffer index which holds pkts to enqueue.

> +
> +	if (builtin_net_driver) {
> +		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> +	} else if (async_vhost_driver) {
> +		uint32_t cpu_cpl_nr = 0;
> +		uint16_t enqueue_fail = 0;
> +		struct rte_mbuf *m_cpu_cpl[nr_xmit];
> +
> +		complete_async_pkts(vdev);
> +		ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> +					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
> +		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
> +
> +		if (cpu_cpl_nr)
> +			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> +
> +		enqueue_fail = nr_xmit - ret;
> +		if (enqueue_fail)
> +			free_pkts(&m[ret], nr_xmit - ret);
> +	} else {
> +		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> +						m, nr_xmit);
> +	}
> +
> +	if (enable_stats) {
> +		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
> +		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
> +	}
> +
> +	if (!async_vhost_driver)
> +		free_pkts(m, nr_xmit);
> +}
> +
> +static __rte_always_inline void
> +drain_vhost_table(void)
> +{
> +	const uint16_t lcore_id = rte_lcore_id();
> +	struct vhost_bufftable *vhost_txq;
> +	struct vhost_dev *vdev;
> +	uint64_t cur_tsc;
> +
> +	TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
> lcore_vdev_entry) {

A lcore may have pkts to enqueue for any vhost device, as it's
decided by mac address. So drain_vhost_table() shouldn't just
process vhost ports allocated to its lcore, but all vhost ports that
have un-sent packets.

> +		if (!vdev->remove) {
> +			vhost_txq = vhost_txbuff[lcore_id *
> MAX_VHOST_DEVICE
> +					+ vdev->vid];
> +			cur_tsc = rte_rdtsc();
> +
> +			if (unlikely(cur_tsc - vhost_txq->pre_tsc
> +					> MBUF_TABLE_DRAIN_TSC)) {
> +				RTE_LOG_DP(DEBUG, VHOST_DATA,
> +					"Vhost tX queue drained after

"tX" -> "TX"

> timeout with burst size %u\n",
> +					vhost_txq->len);
> +				drain_vhost(vdev);
> +				vhost_txq->len = 0;
> +				vhost_txq->pre_tsc = cur_tsc;
> +			}
> +		}
> +	}
> +}
> +
>  /*
>   * Check if the packet destination MAC address is for a local device. If so then
> put
>   * the packet on that devices RX queue. If not then return.
> @@ -846,7 +930,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct
> rte_mbuf *m)
>  {
>  	struct rte_ether_hdr *pkt_hdr;
>  	struct vhost_dev *dst_vdev;
> -
> +	struct vhost_bufftable *vhost_txq;
> +	const uint16_t lcore_id = rte_lcore_id();

Why use "const"?

>  	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> 
>  	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
> @@ -869,7 +954,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct
> rte_mbuf *m)
>  		return 0;
>  	}
> 
> -	virtio_xmit(dst_vdev, vdev, m);
> +	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev-
> >vid];
> +	vhost_txq->m_table[vhost_txq->len++] = m;
> +
> +	if (enable_stats) {
> +		vdev->stats.tx_total++;
> +		vdev->stats.tx++;
> +	}
> +
> +	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
> +		drain_vhost(dst_vdev);
> +		vhost_txq->len = 0;
> +		vhost_txq->pre_tsc = rte_rdtsc();
> +	}
>  	return 0;
>  }
> 
> @@ -940,13 +1037,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
>  	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
>  }
> 
> -static inline void
> -free_pkts(struct rte_mbuf **pkts, uint16_t n)
> -{
> -	while (n--)
> -		rte_pktmbuf_free(pkts[n]);
> -}
> -
>  static __rte_always_inline void
>  do_drain_mbuf_table(struct mbuf_table *tx_q)
>  {
> @@ -979,16 +1069,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct
> rte_mbuf *m, uint16_t vlan_tag)
> 
>  		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry)
> {
>  			if (vdev2 != vdev)
> -				virtio_xmit(vdev2, vdev, m);
> +				sync_virtio_xmit(vdev2, vdev, m);
>  		}
>  		goto queue2nic;
>  	}
> 
>  	/*check if destination is local VM*/
> -	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev,
> m) == 0)) {
> -		rte_pktmbuf_free(m);
> +	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev,
> m) == 0))
>  		return;
> -	}
> 
>  	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
>  		if (unlikely(find_local_dest(vdev, m, &offset,
> @@ -1073,19 +1161,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
>  	}
>  }
> 
> -static __rte_always_inline void
> -complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
> -{
> -	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> -	uint16_t complete_count;
> -
> -	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> -						qid, p_cpl, MAX_PKT_BURST);
> -	vdev->nr_async_pkts -= complete_count;
> -	if (complete_count)
> -		free_pkts(p_cpl, complete_count);
> -}
> -
>  static __rte_always_inline void
>  drain_eth_rx(struct vhost_dev *vdev)
>  {
> @@ -1095,9 +1170,6 @@ drain_eth_rx(struct vhost_dev *vdev)
>  	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
>  				    pkts, MAX_PKT_BURST);
> 
> -	while (likely(vdev->nr_async_pkts))
> -		complete_async_pkts(vdev, VIRTIO_RXQ);
> -
>  	if (!rx_count)
>  		return;
> 
> @@ -1123,17 +1195,31 @@ drain_eth_rx(struct vhost_dev *vdev)
>  		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
>  						pkts, rx_count);
>  	} else if (async_vhost_driver) {
> +		uint32_t cpu_cpl_nr = 0;
> +		uint16_t enqueue_fail = 0;
> +		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
> +
> +		complete_async_pkts(vdev);
>  		enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> -					VIRTIO_RXQ, pkts, rx_count);
> -		vdev->nr_async_pkts += enqueue_count;
> +					VIRTIO_RXQ, pkts, rx_count,
> +					m_cpu_cpl, &cpu_cpl_nr);
> +		atomic_fetch_add(&vdev->nr_async_pkts,
> +					enqueue_count - cpu_cpl_nr);
> +		if (cpu_cpl_nr)
> +			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> +
> +		enqueue_fail = rx_count - enqueue_count;
> +		if (enqueue_fail)
> +			free_pkts(&pkts[enqueue_count], enqueue_fail);
> +
>  	} else {
>  		enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
>  						pkts, rx_count);
>  	}
> 
>  	if (enable_stats) {
> -		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
> -		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
> +		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
> +		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
>  	}
> 
>  	if (!async_vhost_driver)
> @@ -1202,7 +1288,7 @@ switch_worker(void *arg __rte_unused)
> 
>  	while(1) {
>  		drain_mbuf_table(tx_q);
> -
> +		drain_vhost_table();
>  		/*
>  		 * Inform the configuration core that we have exited the
>  		 * linked list and that no devices are in use if requested.
> @@ -1298,6 +1384,7 @@ static int
>  new_device(int vid)
>  {
>  	int lcore, core_add = 0;
> +	uint16_t i;
>  	uint32_t device_num_min = num_devices;
>  	struct vhost_dev *vdev;
>  	vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> RTE_CACHE_LINE_SIZE);
> @@ -1309,6 +1396,13 @@ new_device(int vid)
>  	}
>  	vdev->vid = vid;
> 
> +	for (i = 0; i < RTE_MAX_LCORE; i++) {
> +		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
> +			= rte_zmalloc("vhost bufftable",
> +				sizeof(struct vhost_bufftable),
> +				RTE_CACHE_LINE_SIZE);

Rte_zmalloc() may fail. Need to handle failure.

> +	}
> +
>  	if (builtin_net_driver)
>  		vs_vhost_net_setup(vdev);
> 
> @@ -1343,12 +1437,15 @@ new_device(int vid)
>  	if (async_vhost_driver) {
>  		struct rte_vhost_async_features f;
>  		struct rte_vhost_async_channel_ops channel_ops;
> +
>  		if (strncmp(dma_type, "ioat", 4) == 0) {
>  			channel_ops.transfer_data = ioat_transfer_data_cb;
>  			channel_ops.check_completed_copies =
>  				ioat_check_completed_copies_cb;
> +
>  			f.async_inorder = 1;
>  			f.async_threshold = 256;
> +
>  			return rte_vhost_async_channel_register(vid,
> VIRTIO_RXQ,
>  				f.intval, &channel_ops);
>  		}
> @@ -1392,8 +1489,8 @@ print_stats(__rte_unused void *arg)
>  			tx         = vdev->stats.tx;
>  			tx_dropped = tx_total - tx;
> 
> -			rx_total   = rte_atomic64_read(&vdev-
> >stats.rx_total_atomic);
> -			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
> +			rx_total   = atomic_load(&vdev-
> >stats.rx_total_atomic);
> +			rx         = atomic_load(&vdev->stats.rx_atomic);
>  			rx_dropped = rx_total - rx;
> 
>  			printf("Statistics for device %d\n"
> @@ -1592,6 +1689,7 @@ main(int argc, char *argv[])
>  	/* Register vhost user driver to handle vhost messages. */
>  	for (i = 0; i < nb_sockets; i++) {
>  		char *file = socket_files + i * PATH_MAX;
> +
>  		if (async_vhost_driver)
>  			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> 
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index 4317b6ae8..6aa798a3e 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -8,6 +8,7 @@
>  #include <sys/queue.h>
> 
>  #include <rte_ether.h>
> +#include <stdatomic.h>
> 
>  /* Macros for printing using RTE_LOG */
>  #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> @@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>  struct device_statistics {
>  	uint64_t	tx;
>  	uint64_t	tx_total;
> -	rte_atomic64_t	rx_atomic;
> -	rte_atomic64_t	rx_total_atomic;
> +	atomic_int_least64_t	rx_atomic;
> +	atomic_int_least64_t	rx_total_atomic;
>  };
> 
>  struct vhost_queue {
> @@ -51,7 +52,7 @@ struct vhost_dev {
>  	uint64_t features;
>  	size_t hdr_len;
>  	uint16_t nr_vrings;
> -	uint16_t nr_async_pkts;
> +	atomic_int_least16_t nr_async_pkts;
>  	struct rte_vhost_memory *mem;
>  	struct device_statistics stats;
>  	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (5 preceding siblings ...)
  2020-12-25  8:07 ` [dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-28  7:16 ` Cheng Jiang
  2020-12-28  7:16   ` [dpdk-dev] [PATCH v5 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2020-12-28  7:16   ` [dpdk-dev] [PATCH v5 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2021-01-04  4:57 ` [dpdk-dev] [PATCH v6 0/2] examples/vhost: sample code refactor Cheng Jiang
                   ` (3 subsequent siblings)
  10 siblings, 2 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-28  7:16 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replace
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v5:
 * added vhost enqueue buffer free when destroy a vhost device
 * added rte_ioat_completed_ops() fail handler
 * changed the behavior of drain_vhost_table() function
 * changed some variable names
 * changed some variable definition
 * added rte_zmalloc() fail handler
 * added some comments
 * fixed some typos

v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replaced rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  22 +++--
 examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
 examples/vhost/main.h |   7 +-
 3 files changed, 178 insertions(+), 65 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 1/2] examples/vhost: add ioat ring space count and check
  2020-12-28  7:16 ` [dpdk-dev] [PATCH v5 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2020-12-28  7:16   ` Cheng Jiang
  2020-12-28  7:16   ` [dpdk-dev] [PATCH v5 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  1 sibling, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-28  7:16 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure. Add rte_ioat_completed_ops() fail handler.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f..679d1e2f5 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 {
 	if (!opaque_data) {
 		uintptr_t dump[255];
-		unsigned short n_seg;
+		int n_seg;
 		unsigned short read, write;
 		unsigned short nb_packet = 0;
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
+
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-		n_seg += cb_tracker[dev_id].last_remain;
 		if (!n_seg)
 			return 0;
+
+		cb_tracker[dev_id].ioat_space += n_seg;
+		n_seg += cb_tracker[dev_id].last_remain;
+
 		read = cb_tracker[dev_id].next_read;
 		write = cb_tracker[dev_id].next_write;
 		for (i = 0; i < max_packets; i++) {
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 2/2] examples/vhost: refactor vhost data path
  2020-12-28  7:16 ` [dpdk-dev] [PATCH v5 0/2] examples/vhost: sample code refactor Cheng Jiang
  2020-12-28  7:16   ` [dpdk-dev] [PATCH v5 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2020-12-28  7:16   ` Cheng Jiang
  1 sibling, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2020-12-28  7:16 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replace rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
 examples/vhost/main.h |   7 +-
 2 files changed, 166 insertions(+), 55 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038b..45976c93c 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,22 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/*
+ * Vhost TX buffer for each data core.
+ * Every data core maintains a TX buffer for every vhost device,
+ * which is used for batch pkts enqueue for higher performance.
+ */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,39 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
+	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
+static __rte_always_inline void
+drain_vhost_table(void)
+{
+	uint16_t lcore_id = rte_lcore_id();
+	struct vhost_bufftable *vhost_txq;
+	struct vhost_dev *vdev;
+	uint64_t cur_tsc;
+
+	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
+						+ vdev->vid];
+
+		cur_tsc = rte_rdtsc();
+		if (unlikely(cur_tsc - vhost_txq->pre_tsc
+				> MBUF_TABLE_DRAIN_TSC)) {
+			RTE_LOG_DP(DEBUG, VHOST_DATA,
+				"Vhost TX queue drained after timeout with burst size %u\n",
+				vhost_txq->len);
+			drain_vhost(vdev);
+			vhost_txq->len = 0;
+			vhost_txq->pre_tsc = cur_tsc;
+		}
+	}
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +932,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -869,7 +956,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +1039,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -979,16 +1071,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
-	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
+	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
 		return;
-	}
 
 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
 		if (unlikely(find_local_dest(vdev, m, &offset,
@@ -1073,19 +1163,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1095,9 +1172,6 @@ drain_eth_rx(struct vhost_dev *vdev)
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1123,17 +1197,31 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
 	if (!async_vhost_driver)
@@ -1202,7 +1290,7 @@ switch_worker(void *arg __rte_unused)
 
 	while(1) {
 		drain_mbuf_table(tx_q);
-
+		drain_vhost_table();
 		/*
 		 * Inform the configuration core that we have exited the
 		 * linked list and that no devices are in use if requested.
@@ -1243,6 +1331,7 @@ destroy_device(int vid)
 {
 	struct vhost_dev *vdev = NULL;
 	int lcore;
+	uint16_t i;
 
 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
 		if (vdev->vid == vid)
@@ -1256,6 +1345,9 @@ destroy_device(int vid)
 		rte_pause();
 	}
 
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
+
 	if (builtin_net_driver)
 		vs_vhost_net_remove(vdev);
 
@@ -1298,6 +1390,7 @@ static int
 new_device(int vid)
 {
 	int lcore, core_add = 0;
+	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
@@ -1309,6 +1402,19 @@ new_device(int vid)
 	}
 	vdev->vid = vid;
 
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
+			= rte_zmalloc("vhost bufftable",
+				sizeof(struct vhost_bufftable),
+				RTE_CACHE_LINE_SIZE);
+
+		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
+			RTE_LOG(INFO, VHOST_DATA,
+			  "(%d) couldn't allocate memory for vhost TX\n", vid);
+			return -1;
+		}
+	}
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
 
@@ -1343,12 +1449,15 @@ new_device(int vid)
 	if (async_vhost_driver) {
 		struct rte_vhost_async_features f;
 		struct rte_vhost_async_channel_ops channel_ops;
+
 		if (strncmp(dma_type, "ioat", 4) == 0) {
 			channel_ops.transfer_data = ioat_transfer_data_cb;
 			channel_ops.check_completed_copies =
 				ioat_check_completed_copies_cb;
+
 			f.async_inorder = 1;
 			f.async_threshold = 256;
+
 			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
 				f.intval, &channel_ops);
 		}
@@ -1392,8 +1501,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
@@ -1592,6 +1701,7 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+
 		if (async_vhost_driver)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
 
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae8..6aa798a3e 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>
 
 #include <rte_ether.h>
+#include <stdatomic.h>
 
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check
  2020-12-28  2:50     ` Hu, Jiayu
@ 2020-12-28  8:08       ` Jiang, Cheng1
  0 siblings, 0 replies; 44+ messages in thread
From: Jiang, Cheng1 @ 2020-12-28  8:08 UTC (permalink / raw)
  To: Hu, Jiayu, maxime.coquelin, Xia, Chenbo; +Cc: dev, Yang, YvonneX

Hi Jiayu,

> -----Original Message-----
> From: Hu, Jiayu <jiayu.hu@intel.com>
> Sent: Monday, December 28, 2020 10:51 AM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: RE: [PATCH v4 1/2] examples/vhost: add ioat ring space count and
> check
> 
> Hi Cheng,
> 
> > -----Original Message-----
> > From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Sent: Friday, December 25, 2020 4:07 PM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> > <yvonnex.yang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Subject: [PATCH v4 1/2] examples/vhost: add ioat ring space count and
> > check
> >
> > Add ioat ring space count and check, if ioat ring space is not enough
> > for the next async vhost packet enqueue, then just return to prevent
> > enqueue failure.
> >
> > Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> > ---
> >  examples/vhost/ioat.c | 15 +++++++--------
> >  1 file changed, 7 insertions(+), 8 deletions(-)
> >
> > diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index
> > 71d8a1f1f..b0b04aa45 100644
> > --- a/examples/vhost/ioat.c
> > +++ b/examples/vhost/ioat.c
> > @@ -17,6 +17,7 @@ struct packet_tracker {  unsigned short next_read;
> > unsigned short next_write;  unsigned short last_remain;
> > +unsigned short ioat_space;
> >  };
> >
> >  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; @@ -113,7 +114,7
> > @@ open_ioat(const char *value)  goto out;  }
> > rte_rawdev_start(dev_id);
> > -
> > +cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
> >  dma_info->nr++;
> >  i++;
> >  }
> > @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
> > src = descs[i_desc].src;  dst = descs[i_desc].dst;  i_seg = 0;
> > +if (cb_tracker[dev_id].ioat_space < src->nr_segs) break;
> >  while (i_seg < src->nr_segs) {
> > -/*
> > - * TODO: Assuming that the ring space of the
> > - * IOAT device is large enough, so there is no
> > - * error here, and the actual error handling
> > - * will be added later.
> > - */
> >  rte_ioat_enqueue_copy(dev_id,
> >  (uintptr_t)(src->iov[i_seg].iov_base)
> >  + src->offset,
> > @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
> > i_seg++;  }  write &= mask; -cb_tracker[dev_id].size_track[write] =
> > i_seg;
> > +cb_tracker[dev_id].size_track[write] = src->nr_segs;
> > +cb_tracker[dev_id].ioat_space -= src->nr_segs;
> >  write++;
> >  }
> >  } else {
> > @@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t
> > queue_id,  int dev_id = dma_bind[vid].dmas[queue_id * 2  +
> > VIRTIO_RXQ].dev_id;  n_seg = rte_ioat_completed_ops(dev_id, 255, dump,
> > dump);
> > +cb_tracker[dev_id].ioat_space += n_seg;
> 
> rte_ioat_completed_ops() may fail. In this case, its return value is -1, which
> will cause n_seg to 65534.

I will change it in the next version.

Thanks.
Cheng

> 
> Thanks,
> Jiayu
> 
> >  n_seg += cb_tracker[dev_id].last_remain;  if (!n_seg)  return 0;
> > --
> > 2.29.2
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path
  2020-12-28  4:03     ` Hu, Jiayu
@ 2020-12-28  8:21       ` Jiang, Cheng1
  0 siblings, 0 replies; 44+ messages in thread
From: Jiang, Cheng1 @ 2020-12-28  8:21 UTC (permalink / raw)
  To: Hu, Jiayu, maxime.coquelin, Xia, Chenbo; +Cc: dev, Yang, YvonneX

Hi Jiayu,


> -----Original Message-----
> From: Hu, Jiayu <jiayu.hu@intel.com>
> Sent: Monday, December 28, 2020 12:04 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: RE: [PATCH v4 2/2] examples/vhost: refactor vhost data path
> 
> Hi Cheng,
> 
> Some comments are inline.
> 
> Thanks,
> Jiayu
> > -----Original Message-----
> > From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Sent: Friday, December 25, 2020 4:07 PM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> > <yvonnex.yang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Subject: [PATCH v4 2/2] examples/vhost: refactor vhost data path
> >
> > Change the vm2vm data path to batch enqueue for better performance.
> > Support latest async vhost API, refactor vhost async data path,
> > replase rte_atomicNN_xxx to atomic_XXX and clean some codes.
> 
> Typo: replase -> replace

I'll fix it in the next version.

> 
> >
> > Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> > ---
> >  examples/vhost/main.c | 202 +++++++++++++++++++++++++++++++-----
> ------
> >  examples/vhost/main.h |   7 +-
> >  2 files changed, 154 insertions(+), 55 deletions(-)
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > 8d8c3038b..3ea12a474 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -179,9 +179,18 @@ struct mbuf_table {  struct rte_mbuf
> > *m_table[MAX_PKT_BURST];  };
> >
> > +struct vhost_bufftable {
> > +uint32_t len;
> > +uint64_t pre_tsc;
> > +struct rte_mbuf *m_table[MAX_PKT_BURST]; };
> > +
> >  /* TX queue for each data core. */
> >  struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
> >
> > +/* TX queue for each vhost device. */
> 
> Every lcore maintains a TX buffer for every vhost device, which is to batch
> pkts to enqueue for higher performance.
> I suggest you to update the description of vhost_txbuff above, as it is not
> very clear.

Sure, will add some comments in the next version.

> 
> > +struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE *
> > MAX_VHOST_DEVICE];
> > +
> >  #define MBUF_TABLE_DRAIN_TSC((rte_get_tsc_hz() + US_PER_S - 1) \
> >   / US_PER_S * BURST_TX_DRAIN_US)
> >  #define VLAN_HLEN       4
> > @@ -804,39 +813,114 @@ unlink_vmdq(struct vhost_dev *vdev)  }  }
> >
> > +static inline void
> > +free_pkts(struct rte_mbuf **pkts, uint16_t n) { while (n--)
> > +rte_pktmbuf_free(pkts[n]); }
> > +
> >  static __rte_always_inline void
> > -virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
> > +complete_async_pkts(struct vhost_dev *vdev) { struct rte_mbuf
> > +*p_cpl[MAX_PKT_BURST]; uint16_t complete_count;
> > +
> > +complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > +VIRTIO_RXQ, p_cpl,
> > MAX_PKT_BURST);
> > +if (complete_count) {
> > +atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
> > +free_pkts(p_cpl, complete_count); } }
> > +
> > +static __rte_always_inline void
> > +sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev
> > +*src_vdev,
> >      struct rte_mbuf *m)
> >  {
> >  uint16_t ret;
> > -struct rte_mbuf *m_cpl[1];
> >
> >  if (builtin_net_driver) {
> >  ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); -} else if
> > (async_vhost_driver) { -ret =
> > rte_vhost_submit_enqueue_burst(dst_vdev->vid,
> > VIRTIO_RXQ,
> > -&m, 1);
> > -
> > -if (likely(ret))
> > -dst_vdev->nr_async_pkts++;
> > -
> > -while (likely(dst_vdev->nr_async_pkts)) { -if
> > (rte_vhost_poll_enqueue_completed(dst_vdev-
> > >vid,
> > -VIRTIO_RXQ, m_cpl, 1))
> > -dst_vdev->nr_async_pkts--;
> > -}
> >  } else {
> >  ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);  }
> >
> >  if (enable_stats) {
> > -rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
> > -rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
> > +atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
> > +atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
> >  src_vdev->stats.tx_total++;
> >  src_vdev->stats.tx += ret;
> >  }
> >  }
> >
> > +static __rte_always_inline void
> > +drain_vhost(struct vhost_dev *vdev)
> > +{
> > +uint16_t ret;
> > +uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev-
> > >vid;
> > +uint16_t nr_xmit = vhost_txbuff[queue_id]->len; struct rte_mbuf **m =
> > +vhost_txbuff[queue_id]->m_table;
> 
> "queue_id" is not a very good name, as it's not the queue id of vhost device,
> but a buffer index which holds pkts to enqueue.

Sure, I will change it to buff_idx.

> 
> > +
> > +if (builtin_net_driver) {
> > +ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit); } else if
> > +(async_vhost_driver) { uint32_t cpu_cpl_nr = 0; uint16_t enqueue_fail
> > += 0; struct rte_mbuf *m_cpu_cpl[nr_xmit];
> > +
> > +complete_async_pkts(vdev);
> > +ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> > VIRTIO_RXQ,
> > +m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
> > +atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
> > +
> > +if (cpu_cpl_nr)
> > +free_pkts(m_cpu_cpl, cpu_cpl_nr);
> > +
> > +enqueue_fail = nr_xmit - ret;
> > +if (enqueue_fail)
> > +free_pkts(&m[ret], nr_xmit - ret);
> > +} else {
> > +ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, nr_xmit); }
> > +
> > +if (enable_stats) {
> > +atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
> > +atomic_fetch_add(&vdev->stats.rx_atomic, ret); }
> > +
> > +if (!async_vhost_driver)
> > +free_pkts(m, nr_xmit);
> > +}
> > +
> > +static __rte_always_inline void
> > +drain_vhost_table(void)
> > +{
> > +const uint16_t lcore_id = rte_lcore_id(); struct vhost_bufftable
> > +*vhost_txq; struct vhost_dev *vdev; uint64_t cur_tsc;
> > +
> > +TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
> > lcore_vdev_entry) {
> 
> A lcore may have pkts to enqueue for any vhost device, as it's decided by
> mac address. So drain_vhost_table() shouldn't just process vhost ports
> allocated to its lcore, but all vhost ports that have un-sent packets.

Sure, I will fix it in the next version.

> 
> > +if (!vdev->remove) {
> > +vhost_txq = vhost_txbuff[lcore_id *
> > MAX_VHOST_DEVICE
> > ++ vdev->vid];
> > +cur_tsc = rte_rdtsc();
> > +
> > +if (unlikely(cur_tsc - vhost_txq->pre_tsc
> > +> MBUF_TABLE_DRAIN_TSC)) {
> > +RTE_LOG_DP(DEBUG, VHOST_DATA,
> > +"Vhost tX queue drained after
> 
> "tX" -> "TX"

I will fix it in the next version.

> 
> > timeout with burst size %u\n",
> > +vhost_txq->len);
> > +drain_vhost(vdev);
> > +vhost_txq->len = 0;
> > +vhost_txq->pre_tsc = cur_tsc;
> > +}
> > +}
> > +}
> > +}
> > +
> >  /*
> >   * Check if the packet destination MAC address is for a local device.
> > If so then put
> >   * the packet on that devices RX queue. If not then return.
> > @@ -846,7 +930,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct
> > rte_mbuf *m)  {  struct rte_ether_hdr *pkt_hdr;  struct vhost_dev
> > *dst_vdev;
> > -
> > +struct vhost_bufftable *vhost_txq;
> > +const uint16_t lcore_id = rte_lcore_id();
> 
> Why use "const"?

No particular reason, maybe I just forget to delete it. I will fix it in the next version.

> 
> >  pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> >
> >  dst_vdev = find_vhost_dev(&pkt_hdr->d_addr); @@ -869,7 +954,19 @@
> > virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)  return 0;
> > }
> >
> > -virtio_xmit(dst_vdev, vdev, m);
> > +vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev-
> > >vid];
> > +vhost_txq->m_table[vhost_txq->len++] = m;
> > +
> > +if (enable_stats) {
> > +vdev->stats.tx_total++;
> > +vdev->stats.tx++;
> > +}
> > +
> > +if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
> > +drain_vhost(dst_vdev); vhost_txq->len = 0; vhost_txq->pre_tsc =
> > +rte_rdtsc(); }
> >  return 0;
> >  }
> >
> > @@ -940,13 +1037,6 @@ static void virtio_tx_offload(struct rte_mbuf
> > *m)  tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);  }
> >
> > -static inline void
> > -free_pkts(struct rte_mbuf **pkts, uint16_t n) -{ -while (n--)
> > -rte_pktmbuf_free(pkts[n]); -}
> > -
> >  static __rte_always_inline void
> >  do_drain_mbuf_table(struct mbuf_table *tx_q)  { @@ -979,16 +1069,14
> > @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m,
> > uint16_t vlan_tag)
> >
> >  TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {  if (vdev2
> > != vdev) -virtio_xmit(vdev2, vdev, m);
> > +sync_virtio_xmit(vdev2, vdev, m);
> >  }
> >  goto queue2nic;
> >  }
> >
> >  /*check if destination is local VM*/
> > -if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev,
> > m) == 0)) {
> > -rte_pktmbuf_free(m);
> > +if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev,
> > m) == 0))
> >  return;
> > -}
> >
> >  if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {  if
> > (unlikely(find_local_dest(vdev, m, &offset, @@ -1073,19 +1161,6 @@
> > drain_mbuf_table(struct mbuf_table *tx_q)  }  }
> >
> > -static __rte_always_inline void
> > -complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) -{ -struct
> > rte_mbuf *p_cpl[MAX_PKT_BURST]; -uint16_t complete_count;
> > -
> > -complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > -qid, p_cpl, MAX_PKT_BURST);
> > -vdev->nr_async_pkts -= complete_count;
> > -if (complete_count)
> > -free_pkts(p_cpl, complete_count);
> > -}
> > -
> >  static __rte_always_inline void
> >  drain_eth_rx(struct vhost_dev *vdev)
> >  {
> > @@ -1095,9 +1170,6 @@ drain_eth_rx(struct vhost_dev *vdev)  rx_count
> =
> > rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
> >      pkts, MAX_PKT_BURST);
> >
> > -while (likely(vdev->nr_async_pkts))
> > -complete_async_pkts(vdev, VIRTIO_RXQ);
> > -
> >  if (!rx_count)
> >  return;
> >
> > @@ -1123,17 +1195,31 @@ drain_eth_rx(struct vhost_dev *vdev)
> > enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,  pkts, rx_count);  }
> > else if (async_vhost_driver) {
> > +uint32_t cpu_cpl_nr = 0;
> > +uint16_t enqueue_fail = 0;
> > +struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
> > +
> > +complete_async_pkts(vdev);
> >  enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> > >vid,
> > -VIRTIO_RXQ, pkts, rx_count);
> > -vdev->nr_async_pkts += enqueue_count;
> > +VIRTIO_RXQ, pkts, rx_count,
> > +m_cpu_cpl, &cpu_cpl_nr);
> > +atomic_fetch_add(&vdev->nr_async_pkts,
> > +enqueue_count - cpu_cpl_nr);
> > +if (cpu_cpl_nr)
> > +free_pkts(m_cpu_cpl, cpu_cpl_nr);
> > +
> > +enqueue_fail = rx_count - enqueue_count; if (enqueue_fail)
> > +free_pkts(&pkts[enqueue_count], enqueue_fail);
> > +
> >  } else {
> >  enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> > VIRTIO_RXQ,
> >  pkts, rx_count);
> >  }
> >
> >  if (enable_stats) {
> > -rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
> > -rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
> > +atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
> > +atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
> >  }
> >
> >  if (!async_vhost_driver)
> > @@ -1202,7 +1288,7 @@ switch_worker(void *arg __rte_unused)
> >
> >  while(1) {
> >  drain_mbuf_table(tx_q);
> > -
> > +drain_vhost_table();
> >  /*
> >   * Inform the configuration core that we have exited the
> >   * linked list and that no devices are in use if requested.
> > @@ -1298,6 +1384,7 @@ static int
> >  new_device(int vid)
> >  {
> >  int lcore, core_add = 0;
> > +uint16_t i;
> >  uint32_t device_num_min = num_devices;  struct vhost_dev *vdev;  vdev
> > = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); @@
> > -1309,6 +1396,13 @@ new_device(int vid)  }  vdev->vid = vid;
> >
> > +for (i = 0; i < RTE_MAX_LCORE; i++) { vhost_txbuff[i *
> > +MAX_VHOST_DEVICE + vid] = rte_zmalloc("vhost bufftable",
> > +sizeof(struct vhost_bufftable), RTE_CACHE_LINE_SIZE);
> 
> Rte_zmalloc() may fail. Need to handle failure.

Sure, I will add failure handler in the next version.

Thanks.
Cheng

> 
> > +}
> > +
> >  if (builtin_net_driver)
> >  vs_vhost_net_setup(vdev);
> >
> > @@ -1343,12 +1437,15 @@ new_device(int vid)  if (async_vhost_driver) {
> > struct rte_vhost_async_features f;  struct rte_vhost_async_channel_ops
> > channel_ops;
> > +
> >  if (strncmp(dma_type, "ioat", 4) == 0) {  channel_ops.transfer_data =
> > ioat_transfer_data_cb;  channel_ops.check_completed_copies =
> > ioat_check_completed_copies_cb;
> > +
> >  f.async_inorder = 1;
> >  f.async_threshold = 256;
> > +
> >  return rte_vhost_async_channel_register(vid,
> > VIRTIO_RXQ,
> >  f.intval, &channel_ops);
> >  }
> > @@ -1392,8 +1489,8 @@ print_stats(__rte_unused void *arg)
> >  tx         = vdev->stats.tx;
> >  tx_dropped = tx_total - tx;
> >
> > -rx_total   = rte_atomic64_read(&vdev-
> > >stats.rx_total_atomic);
> > -rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
> > +rx_total   = atomic_load(&vdev-
> > >stats.rx_total_atomic);
> > +rx         = atomic_load(&vdev->stats.rx_atomic);
> >  rx_dropped = rx_total - rx;
> >
> >  printf("Statistics for device %d\n"
> > @@ -1592,6 +1689,7 @@ main(int argc, char *argv[])
> >  /* Register vhost user driver to handle vhost messages. */  for (i =
> > 0; i < nb_sockets; i++) {  char *file = socket_files + i * PATH_MAX;
> > +
> >  if (async_vhost_driver)
> >  flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> >
> > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index
> > 4317b6ae8..6aa798a3e 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -8,6 +8,7 @@
> >  #include <sys/queue.h>
> >
> >  #include <rte_ether.h>
> > +#include <stdatomic.h>
> >
> >  /* Macros for printing using RTE_LOG */  #define
> > RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 @@ -21,8 +22,8 @@
> enum
> > {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};  struct device_statistics {
> > uint64_ttx;  uint64_ttx_total; -rte_atomic64_trx_atomic;
> > -rte_atomic64_trx_total_atomic;
> > +atomic_int_least64_trx_atomic;
> > +atomic_int_least64_trx_total_atomic;
> >  };
> >
> >  struct vhost_queue {
> > @@ -51,7 +52,7 @@ struct vhost_dev {
> >  uint64_t features;
> >  size_t hdr_len;
> >  uint16_t nr_vrings;
> > -uint16_t nr_async_pkts;
> > +atomic_int_least16_t nr_async_pkts;
> >  struct rte_vhost_memory *mem;
> >  struct device_statistics stats;
> >  TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> > --
> > 2.29.2
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v6 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (6 preceding siblings ...)
  2020-12-28  7:16 ` [dpdk-dev] [PATCH v5 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2021-01-04  4:57 ` Cheng Jiang
  2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2021-01-05  2:15 ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Cheng Jiang
                   ` (2 subsequent siblings)
  10 siblings, 2 replies; 44+ messages in thread
From: Cheng Jiang @ 2021-01-04  4:57 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replace
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v6:
 * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h

v5:
 * added vhost enqueue buffer free when destroy a vhost device
 * added rte_ioat_completed_ops() fail handler
 * changed the behavior of drain_vhost_table() function
 * changed some variable names
 * changed some variable definition
 * added rte_zmalloc() fail handler
 * added some comments
 * fixed some typos

v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replaced rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  22 +++--
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
 examples/vhost/main.h |   7 +-
 4 files changed, 179 insertions(+), 66 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check
  2021-01-04  4:57 ` [dpdk-dev] [PATCH v6 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2021-01-04  4:57   ` Cheng Jiang
  2021-01-05  1:19     ` Hu, Jiayu
  2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  1 sibling, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-04  4:57 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure. Add rte_ioat_completed_ops() fail handler.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f5..679d1e2f58 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 {
 	if (!opaque_data) {
 		uintptr_t dump[255];
-		unsigned short n_seg;
+		int n_seg;
 		unsigned short read, write;
 		unsigned short nb_packet = 0;
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
+
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-		n_seg += cb_tracker[dev_id].last_remain;
 		if (!n_seg)
 			return 0;
+
+		cb_tracker[dev_id].ioat_space += n_seg;
+		n_seg += cb_tracker[dev_id].last_remain;
+
 		read = cb_tracker[dev_id].next_read;
 		write = cb_tracker[dev_id].next_write;
 		for (i = 0; i < max_packets; i++) {
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v6 2/2] examples/vhost: refactor vhost data path
  2021-01-04  4:57 ` [dpdk-dev] [PATCH v6 0/2] examples/vhost: sample code refactor Cheng Jiang
  2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-04  4:57   ` Cheng Jiang
  2021-01-05  1:43     ` Hu, Jiayu
  1 sibling, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-04  4:57 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replace rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
 examples/vhost/main.h |   7 +-
 3 files changed, 167 insertions(+), 56 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index d6e1e2e079..0a1dbb8117 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@
 
 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 256
+#define MAX_ENQUEUED_SIZE 512
 
 struct dma_info {
 	struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038bf..45976c93c7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,22 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/*
+ * Vhost TX buffer for each data core.
+ * Every data core maintains a TX buffer for every vhost device,
+ * which is used for batch pkts enqueue for higher performance.
+ */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,39 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
+	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
+static __rte_always_inline void
+drain_vhost_table(void)
+{
+	uint16_t lcore_id = rte_lcore_id();
+	struct vhost_bufftable *vhost_txq;
+	struct vhost_dev *vdev;
+	uint64_t cur_tsc;
+
+	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
+						+ vdev->vid];
+
+		cur_tsc = rte_rdtsc();
+		if (unlikely(cur_tsc - vhost_txq->pre_tsc
+				> MBUF_TABLE_DRAIN_TSC)) {
+			RTE_LOG_DP(DEBUG, VHOST_DATA,
+				"Vhost TX queue drained after timeout with burst size %u\n",
+				vhost_txq->len);
+			drain_vhost(vdev);
+			vhost_txq->len = 0;
+			vhost_txq->pre_tsc = cur_tsc;
+		}
+	}
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +932,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -869,7 +956,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +1039,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -979,16 +1071,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
-	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
+	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
 		return;
-	}
 
 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
 		if (unlikely(find_local_dest(vdev, m, &offset,
@@ -1073,19 +1163,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1095,9 +1172,6 @@ drain_eth_rx(struct vhost_dev *vdev)
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1123,17 +1197,31 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
 	if (!async_vhost_driver)
@@ -1202,7 +1290,7 @@ switch_worker(void *arg __rte_unused)
 
 	while(1) {
 		drain_mbuf_table(tx_q);
-
+		drain_vhost_table();
 		/*
 		 * Inform the configuration core that we have exited the
 		 * linked list and that no devices are in use if requested.
@@ -1243,6 +1331,7 @@ destroy_device(int vid)
 {
 	struct vhost_dev *vdev = NULL;
 	int lcore;
+	uint16_t i;
 
 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
 		if (vdev->vid == vid)
@@ -1256,6 +1345,9 @@ destroy_device(int vid)
 		rte_pause();
 	}
 
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
+
 	if (builtin_net_driver)
 		vs_vhost_net_remove(vdev);
 
@@ -1298,6 +1390,7 @@ static int
 new_device(int vid)
 {
 	int lcore, core_add = 0;
+	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
@@ -1309,6 +1402,19 @@ new_device(int vid)
 	}
 	vdev->vid = vid;
 
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
+			= rte_zmalloc("vhost bufftable",
+				sizeof(struct vhost_bufftable),
+				RTE_CACHE_LINE_SIZE);
+
+		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
+			RTE_LOG(INFO, VHOST_DATA,
+			  "(%d) couldn't allocate memory for vhost TX\n", vid);
+			return -1;
+		}
+	}
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
 
@@ -1343,12 +1449,15 @@ new_device(int vid)
 	if (async_vhost_driver) {
 		struct rte_vhost_async_features f;
 		struct rte_vhost_async_channel_ops channel_ops;
+
 		if (strncmp(dma_type, "ioat", 4) == 0) {
 			channel_ops.transfer_data = ioat_transfer_data_cb;
 			channel_ops.check_completed_copies =
 				ioat_check_completed_copies_cb;
+
 			f.async_inorder = 1;
 			f.async_threshold = 256;
+
 			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
 				f.intval, &channel_ops);
 		}
@@ -1392,8 +1501,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
@@ -1592,6 +1701,7 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+
 		if (async_vhost_driver)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
 
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae81..6aa798a3e2 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>
 
 #include <rte_ether.h>
+#include <stdatomic.h>
 
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check
  2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-05  1:19     ` Hu, Jiayu
  2021-01-05  1:51       ` Jiang, Cheng1
  0 siblings, 1 reply; 44+ messages in thread
From: Hu, Jiayu @ 2021-01-05  1:19 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Xia, Chenbo
  Cc: dev, Yang, YvonneX, Wang, Yinan

Hi Cheng,

> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Monday, January 4, 2021 12:58 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jiang,
> Cheng1 <cheng1.jiang@intel.com>
> Subject: [PATCH v6 1/2] examples/vhost: add ioat ring space count and check
> 
> Add ioat ring space count and check, if ioat ring space is not enough
> for the next async vhost packet enqueue, then just return to prevent
> enqueue failure. Add rte_ioat_completed_ops() fail handler.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> ---
>  examples/vhost/ioat.c | 22 ++++++++++++----------
>  1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
> index 71d8a1f1f5..679d1e2f58 100644
> --- a/examples/vhost/ioat.c
> +++ b/examples/vhost/ioat.c
> @@ -17,6 +17,7 @@ struct packet_tracker {
>  	unsigned short next_read;
>  	unsigned short next_write;
>  	unsigned short last_remain;
> +	unsigned short ioat_space;
>  };
> 
>  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
> @@ -113,7 +114,7 @@ open_ioat(const char *value)
>  			goto out;
>  		}
>  		rte_rawdev_start(dev_id);
> -
> +		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
>  		dma_info->nr++;
>  		i++;
>  	}
> @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  			src = descs[i_desc].src;
>  			dst = descs[i_desc].dst;
>  			i_seg = 0;
> +			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
> +				break;
>  			while (i_seg < src->nr_segs) {
> -				/*
> -				 * TODO: Assuming that the ring space of the
> -				 * IOAT device is large enough, so there is no
> -				 * error here, and the actual error handling
> -				 * will be added later.
> -				 */
>  				rte_ioat_enqueue_copy(dev_id,
>  					(uintptr_t)(src->iov[i_seg].iov_base)
>  						+ src->offset,
> @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  				i_seg++;
>  			}
>  			write &= mask;
> -			cb_tracker[dev_id].size_track[write] = i_seg;
> +			cb_tracker[dev_id].size_track[write] = src->nr_segs;
> +			cb_tracker[dev_id].ioat_space -= src->nr_segs;
>  			write++;
>  		}
>  	} else {
> @@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t
> queue_id,
>  {
>  	if (!opaque_data) {
>  		uintptr_t dump[255];
> -		unsigned short n_seg;
> +		int n_seg;
>  		unsigned short read, write;
>  		unsigned short nb_packet = 0;
>  		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
>  		unsigned short i;
> +
>  		int dev_id = dma_bind[vid].dmas[queue_id * 2
>  				+ VIRTIO_RXQ].dev_id;
>  		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
> -		n_seg += cb_tracker[dev_id].last_remain;
>  		if (!n_seg)
>  			return 0;
> +
> +		cb_tracker[dev_id].ioat_space += n_seg;
> +		n_seg += cb_tracker[dev_id].last_remain;

When error happens in rte_ioat_completed_ops(), where n_seg is -1,
the value of "!n_seg" is false (0) and it can still pass the check of "if".

Thanks,
Jiayu
> +
>  		read = cb_tracker[dev_id].next_read;
>  		write = cb_tracker[dev_id].next_write;
>  		for (i = 0; i < max_packets; i++) {
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v6 2/2] examples/vhost: refactor vhost data path
  2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 2/2] examples/vhost: refactor vhost data path Cheng Jiang
@ 2021-01-05  1:43     ` Hu, Jiayu
  0 siblings, 0 replies; 44+ messages in thread
From: Hu, Jiayu @ 2021-01-05  1:43 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Xia, Chenbo
  Cc: dev, Yang, YvonneX, Wang, Yinan


> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Monday, January 4, 2021 12:58 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jiang,
> Cheng1 <cheng1.jiang@intel.com>
> Subject: [PATCH v6 2/2] examples/vhost: refactor vhost data path
> 
> Change the vm2vm data path to batch enqueue for better performance.
> Support latest async vhost API, refactor vhost async data path,
> replace rte_atomicNN_xxx to atomic_XXX and clean some codes.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> ---
>  examples/vhost/ioat.h |   2 +-
>  examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
>  examples/vhost/main.h |   7 +-
>  3 files changed, 167 insertions(+), 56 deletions(-)
> 
Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check
  2021-01-05  1:19     ` Hu, Jiayu
@ 2021-01-05  1:51       ` Jiang, Cheng1
  0 siblings, 0 replies; 44+ messages in thread
From: Jiang, Cheng1 @ 2021-01-05  1:51 UTC (permalink / raw)
  To: Hu, Jiayu, maxime.coquelin, Xia, Chenbo; +Cc: dev, Yang, YvonneX, Wang, Yinan

Hi Jiayu,

> -----Original Message-----
> From: Hu, Jiayu <jiayu.hu@intel.com>
> Sent: Tuesday, January 5, 2021 9:20 AM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Yang, YvonneX <yvonnex.yang@intel.com>; Wang, Yinan
> <yinan.wang@intel.com>
> Subject: RE: [PATCH v6 1/2] examples/vhost: add ioat ring space count and
> check
> 
> Hi Cheng,
> 
> > -----Original Message-----
> > From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Sent: Monday, January 4, 2021 12:58 PM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> > <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jiang,
> > Cheng1 <cheng1.jiang@intel.com>
> > Subject: [PATCH v6 1/2] examples/vhost: add ioat ring space count and
> > check
> >
> > Add ioat ring space count and check, if ioat ring space is not enough
> > for the next async vhost packet enqueue, then just return to prevent
> > enqueue failure. Add rte_ioat_completed_ops() fail handler.
> >
> > Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> > ---
> >  examples/vhost/ioat.c | 22 ++++++++++++----------
> >  1 file changed, 12 insertions(+), 10 deletions(-)
> >
> > diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index
> > 71d8a1f1f5..679d1e2f58 100644
> > --- a/examples/vhost/ioat.c
> > +++ b/examples/vhost/ioat.c
> > @@ -17,6 +17,7 @@ struct packet_tracker {  unsigned short next_read;
> > unsigned short next_write;  unsigned short last_remain;
> > +unsigned short ioat_space;
> >  };
> >
> >  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; @@ -113,7 +114,7
> > @@ open_ioat(const char *value)  goto out;  }
> > rte_rawdev_start(dev_id);
> > -
> > +cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
> >  dma_info->nr++;
> >  i++;
> >  }
> > @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
> > src = descs[i_desc].src;  dst = descs[i_desc].dst;  i_seg = 0;
> > +if (cb_tracker[dev_id].ioat_space < src->nr_segs) break;
> >  while (i_seg < src->nr_segs) {
> > -/*
> > - * TODO: Assuming that the ring space of the
> > - * IOAT device is large enough, so there is no
> > - * error here, and the actual error handling
> > - * will be added later.
> > - */
> >  rte_ioat_enqueue_copy(dev_id,
> >  (uintptr_t)(src->iov[i_seg].iov_base)
> >  + src->offset,
> > @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
> > i_seg++;  }  write &= mask; -cb_tracker[dev_id].size_track[write] =
> > i_seg;
> > +cb_tracker[dev_id].size_track[write] = src->nr_segs;
> > +cb_tracker[dev_id].ioat_space -= src->nr_segs;
> >  write++;
> >  }
> >  } else {
> > @@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid,
> uint16_t
> > queue_id,  {  if (!opaque_data) {  uintptr_t dump[255]; -unsigned
> > short n_seg;
> > +int n_seg;
> >  unsigned short read, write;
> >  unsigned short nb_packet = 0;
> >  unsigned short mask = MAX_ENQUEUED_SIZE - 1;  unsigned short i;
> > +
> >  int dev_id = dma_bind[vid].dmas[queue_id * 2  + VIRTIO_RXQ].dev_id;
> > n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump); -n_seg +=
> > cb_tracker[dev_id].last_remain;  if (!n_seg)  return 0;
> > +
> > +cb_tracker[dev_id].ioat_space += n_seg; n_seg +=
> > +cb_tracker[dev_id].last_remain;
> 
> When error happens in rte_ioat_completed_ops(), where n_seg is -1, the
> value of "!n_seg" is false (0) and it can still pass the check of "if".
> 

You are right, I think I just missed that
I will fix it in the next version, thanks a lot.

Cheng

> Thanks,
> Jiayu
> > +
> >  read = cb_tracker[dev_id].next_read;
> >  write = cb_tracker[dev_id].next_write;  for (i = 0; i < max_packets;
> > i++) {
> > --
> > 2.29.2
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (7 preceding siblings ...)
  2021-01-04  4:57 ` [dpdk-dev] [PATCH v6 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2021-01-05  2:15 ` Cheng Jiang
  2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
                     ` (2 more replies)
  2021-01-11  5:52 ` [dpdk-dev] [PATCH v8 " Cheng Jiang
  2021-01-12  4:38 ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Cheng Jiang
  10 siblings, 3 replies; 44+ messages in thread
From: Cheng Jiang @ 2021-01-05  2:15 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replace
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v7:
 * fixed rte_ioat_completed_ops() fail handler issue

v6:
 * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h

v5:
 * added vhost enqueue buffer free when destroy a vhost device
 * added rte_ioat_completed_ops() fail handler
 * changed the behavior of drain_vhost_table() function
 * changed some variable names
 * changed some variable definition
 * added rte_zmalloc() fail handler
 * added some comments
 * fixed some typos

v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replaced rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  24 ++---
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
 examples/vhost/main.h |   7 +-
 4 files changed, 180 insertions(+), 67 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check
  2021-01-05  2:15 ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2021-01-05  2:15   ` Cheng Jiang
  2021-01-05  6:56     ` Hu, Jiayu
  2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2021-01-06  7:47   ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Ling, WeiX
  2 siblings, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-05  2:15 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure. Add rte_ioat_completed_ops() fail handler.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f5..dbad28d43e 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 {
 	if (!opaque_data) {
 		uintptr_t dump[255];
-		unsigned short n_seg;
+		int n_seg;
 		unsigned short read, write;
 		unsigned short nb_packet = 0;
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
+
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-		n_seg += cb_tracker[dev_id].last_remain;
-		if (!n_seg)
+		if (n_seg <= 0)
 			return 0;
+
+		cb_tracker[dev_id].ioat_space += n_seg;
+		n_seg += cb_tracker[dev_id].last_remain;
+
 		read = cb_tracker[dev_id].next_read;
 		write = cb_tracker[dev_id].next_write;
 		for (i = 0; i < max_packets; i++) {
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v7 2/2] examples/vhost: refactor vhost data path
  2021-01-05  2:15 ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Cheng Jiang
  2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-05  2:15   ` Cheng Jiang
  2021-01-06  7:47   ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Ling, WeiX
  2 siblings, 0 replies; 44+ messages in thread
From: Cheng Jiang @ 2021-01-05  2:15 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replace rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++----------
 examples/vhost/main.h |   7 +-
 3 files changed, 167 insertions(+), 56 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index d6e1e2e079..0a1dbb8117 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@
 
 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 256
+#define MAX_ENQUEUED_SIZE 512
 
 struct dma_info {
 	struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038bf..45976c93c7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,22 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/*
+ * Vhost TX buffer for each data core.
+ * Every data core maintains a TX buffer for every vhost device,
+ * which is used for batch pkts enqueue for higher performance.
+ */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,39 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1);
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
+	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
+static __rte_always_inline void
+drain_vhost_table(void)
+{
+	uint16_t lcore_id = rte_lcore_id();
+	struct vhost_bufftable *vhost_txq;
+	struct vhost_dev *vdev;
+	uint64_t cur_tsc;
+
+	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
+						+ vdev->vid];
+
+		cur_tsc = rte_rdtsc();
+		if (unlikely(cur_tsc - vhost_txq->pre_tsc
+				> MBUF_TABLE_DRAIN_TSC)) {
+			RTE_LOG_DP(DEBUG, VHOST_DATA,
+				"Vhost TX queue drained after timeout with burst size %u\n",
+				vhost_txq->len);
+			drain_vhost(vdev);
+			vhost_txq->len = 0;
+			vhost_txq->pre_tsc = cur_tsc;
+		}
+	}
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +932,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -869,7 +956,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -940,13 +1039,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -979,16 +1071,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
-	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
+	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
 		return;
-	}
 
 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
 		if (unlikely(find_local_dest(vdev, m, &offset,
@@ -1073,19 +1163,6 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1095,9 +1172,6 @@ drain_eth_rx(struct vhost_dev *vdev)
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1123,17 +1197,31 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count);
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
 	if (!async_vhost_driver)
@@ -1202,7 +1290,7 @@ switch_worker(void *arg __rte_unused)
 
 	while(1) {
 		drain_mbuf_table(tx_q);
-
+		drain_vhost_table();
 		/*
 		 * Inform the configuration core that we have exited the
 		 * linked list and that no devices are in use if requested.
@@ -1243,6 +1331,7 @@ destroy_device(int vid)
 {
 	struct vhost_dev *vdev = NULL;
 	int lcore;
+	uint16_t i;
 
 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
 		if (vdev->vid == vid)
@@ -1256,6 +1345,9 @@ destroy_device(int vid)
 		rte_pause();
 	}
 
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
+
 	if (builtin_net_driver)
 		vs_vhost_net_remove(vdev);
 
@@ -1298,6 +1390,7 @@ static int
 new_device(int vid)
 {
 	int lcore, core_add = 0;
+	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
@@ -1309,6 +1402,19 @@ new_device(int vid)
 	}
 	vdev->vid = vid;
 
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
+			= rte_zmalloc("vhost bufftable",
+				sizeof(struct vhost_bufftable),
+				RTE_CACHE_LINE_SIZE);
+
+		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
+			RTE_LOG(INFO, VHOST_DATA,
+			  "(%d) couldn't allocate memory for vhost TX\n", vid);
+			return -1;
+		}
+	}
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
 
@@ -1343,12 +1449,15 @@ new_device(int vid)
 	if (async_vhost_driver) {
 		struct rte_vhost_async_features f;
 		struct rte_vhost_async_channel_ops channel_ops;
+
 		if (strncmp(dma_type, "ioat", 4) == 0) {
 			channel_ops.transfer_data = ioat_transfer_data_cb;
 			channel_ops.check_completed_copies =
 				ioat_check_completed_copies_cb;
+
 			f.async_inorder = 1;
 			f.async_threshold = 256;
+
 			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
 				f.intval, &channel_ops);
 		}
@@ -1392,8 +1501,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
@@ -1592,6 +1701,7 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+
 		if (async_vhost_driver)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
 
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae81..6aa798a3e2 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>
 
 #include <rte_ether.h>
+#include <stdatomic.h>
 
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check
  2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-05  6:56     ` Hu, Jiayu
  0 siblings, 0 replies; 44+ messages in thread
From: Hu, Jiayu @ 2021-01-05  6:56 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Xia, Chenbo
  Cc: dev, Yang, YvonneX, Wang, Yinan

Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>

> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Tuesday, January 5, 2021 10:15 AM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jiang,
> Cheng1 <cheng1.jiang@intel.com>
> Subject: [PATCH v7 1/2] examples/vhost: add ioat ring space count and check
> 
> Add ioat ring space count and check, if ioat ring space is not enough
> for the next async vhost packet enqueue, then just return to prevent
> enqueue failure. Add rte_ioat_completed_ops() fail handler.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> ---
>  examples/vhost/ioat.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
> index 71d8a1f1f5..dbad28d43e 100644
> --- a/examples/vhost/ioat.c
> +++ b/examples/vhost/ioat.c
> @@ -17,6 +17,7 @@ struct packet_tracker {
>  	unsigned short next_read;
>  	unsigned short next_write;
>  	unsigned short last_remain;
> +	unsigned short ioat_space;
>  };
> 
>  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
> @@ -113,7 +114,7 @@ open_ioat(const char *value)
>  			goto out;
>  		}
>  		rte_rawdev_start(dev_id);
> -
> +		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
>  		dma_info->nr++;
>  		i++;
>  	}
> @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  			src = descs[i_desc].src;
>  			dst = descs[i_desc].dst;
>  			i_seg = 0;
> +			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
> +				break;
>  			while (i_seg < src->nr_segs) {
> -				/*
> -				 * TODO: Assuming that the ring space of the
> -				 * IOAT device is large enough, so there is no
> -				 * error here, and the actual error handling
> -				 * will be added later.
> -				 */
>  				rte_ioat_enqueue_copy(dev_id,
>  					(uintptr_t)(src->iov[i_seg].iov_base)
>  						+ src->offset,
> @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  				i_seg++;
>  			}
>  			write &= mask;
> -			cb_tracker[dev_id].size_track[write] = i_seg;
> +			cb_tracker[dev_id].size_track[write] = src->nr_segs;
> +			cb_tracker[dev_id].ioat_space -= src->nr_segs;
>  			write++;
>  		}
>  	} else {
> @@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t
> queue_id,
>  {
>  	if (!opaque_data) {
>  		uintptr_t dump[255];
> -		unsigned short n_seg;
> +		int n_seg;
>  		unsigned short read, write;
>  		unsigned short nb_packet = 0;
>  		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
>  		unsigned short i;
> +
>  		int dev_id = dma_bind[vid].dmas[queue_id * 2
>  				+ VIRTIO_RXQ].dev_id;
>  		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
> -		n_seg += cb_tracker[dev_id].last_remain;
> -		if (!n_seg)
> +		if (n_seg <= 0)
>  			return 0;
> +
> +		cb_tracker[dev_id].ioat_space += n_seg;
> +		n_seg += cb_tracker[dev_id].last_remain;
> +
>  		read = cb_tracker[dev_id].next_read;
>  		write = cb_tracker[dev_id].next_write;
>  		for (i = 0; i < max_packets; i++) {
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor
  2021-01-05  2:15 ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Cheng Jiang
  2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 2/2] examples/vhost: refactor vhost data path Cheng Jiang
@ 2021-01-06  7:47   ` Ling, WeiX
  2 siblings, 0 replies; 44+ messages in thread
From: Ling, WeiX @ 2021-01-06  7:47 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Xia, Chenbo
  Cc: dev, Hu, Jiayu, Yang, YvonneX, Wang, Yinan, Jiang, Cheng1

Tested-by: Wei Ling <weix.ling@intel.com>

Regards,
Ling Wei

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Cheng Jiang
> Sent: Tuesday, January 5, 2021 10:15 AM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jiang,
> Cheng1 <cheng1.jiang@intel.com>
> Subject: [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor
> 
> Refactor the vhost sample code. Add ioat ring space count and check in ioat
> callback, optimize vhost data path for batch enqueue, replace
> rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
> ---
> v7:
>  * fixed rte_ioat_completed_ops() fail handler issue
> 
> v6:
>  * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h
> 
> v5:
>  * added vhost enqueue buffer free when destroy a vhost device
>  * added rte_ioat_completed_ops() fail handler
>  * changed the behavior of drain_vhost_table() function
>  * changed some variable names
>  * changed some variable definition
>  * added rte_zmalloc() fail handler
>  * added some comments
>  * fixed some typos
> 
> v4:
>  * improved code structure
>  * improved vhost enqueue buffer memory allocation
>  * cleaned some codes
> 
> v3:
>  * added some variable initiation
>  * cleaned some codes
> 
> v2:
>  * optimized patch structure
>  * optimized git log
>  * replaced rte_atomicNN_xxx to atomic_XXX
> 
> Cheng Jiang (2):
>   examples/vhost: add ioat ring space count and check
>   examples/vhost: refactor vhost data path
> 
>  examples/vhost/ioat.c |  24 ++---
>  examples/vhost/ioat.h |   2 +-
>  examples/vhost/main.c | 214 ++++++++++++++++++++++++++++++++-----
> -----
>  examples/vhost/main.h |   7 +-
>  4 files changed, 180 insertions(+), 67 deletions(-)
> 
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v8 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (8 preceding siblings ...)
  2021-01-05  2:15 ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2021-01-11  5:52 ` Cheng Jiang
  2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2021-01-12  4:38 ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Cheng Jiang
  10 siblings, 2 replies; 44+ messages in thread
From: Cheng Jiang @ 2021-01-11  5:52 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replace
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v8:
 * rebased codes

v7:
 * fixed rte_ioat_completed_ops() fail handler issue

v6:
 * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h

v5:
 * added vhost enqueue buffer free when destroy a vhost device
 * added rte_ioat_completed_ops() fail handler
 * changed the behavior of drain_vhost_table() function
 * changed some variable names
 * changed some variable definition
 * added rte_zmalloc() fail handler
 * added some comments
 * fixed some typos

v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replaced rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  24 +++--
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 226 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   7 +-
 4 files changed, 181 insertions(+), 78 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check
  2021-01-11  5:52 ` [dpdk-dev] [PATCH v8 " Cheng Jiang
@ 2021-01-11  5:52   ` Cheng Jiang
  2021-01-11 14:15     ` Maxime Coquelin
  2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  1 sibling, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-11  5:52 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang, Jiayu Hu

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure. Add rte_ioat_completed_ops() fail handler.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
---
 examples/vhost/ioat.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f..dbad28d43 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };

 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 {
 	if (!opaque_data) {
 		uintptr_t dump[255];
-		unsigned short n_seg;
+		int n_seg;
 		unsigned short read, write;
 		unsigned short nb_packet = 0;
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
+
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-		n_seg += cb_tracker[dev_id].last_remain;
-		if (!n_seg)
+		if (n_seg <= 0)
 			return 0;
+
+		cb_tracker[dev_id].ioat_space += n_seg;
+		n_seg += cb_tracker[dev_id].last_remain;
+
 		read = cb_tracker[dev_id].next_read;
 		write = cb_tracker[dev_id].next_write;
 		for (i = 0; i < max_packets; i++) {
--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path
  2021-01-11  5:52 ` [dpdk-dev] [PATCH v8 " Cheng Jiang
  2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-11  5:52   ` Cheng Jiang
  2021-01-11 14:25     ` Maxime Coquelin
  1 sibling, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-11  5:52 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang, Jiayu Hu

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replace rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
---
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 226 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   7 +-
 3 files changed, 168 insertions(+), 67 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index d6e1e2e07..0a1dbb811 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@

 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 256
+#define MAX_ENQUEUED_SIZE 512

 struct dma_info {
 	struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 22309977c..45976c93c 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,22 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };

+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];

+/*
+ * Vhost TX buffer for each data core.
+ * Every data core maintains a TX buffer for every vhost device,
+ * which is used for batch pkts enqueue for higher performance.
+ */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,43 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }

+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1], *comp_pkt;
-	uint32_t nr_comp = 0;

 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1, &comp_pkt, &nr_comp);
-		if (nr_comp == 1)
-			goto done;
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}

-done:
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }

+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
+	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
+static __rte_always_inline void
+drain_vhost_table(void)
+{
+	uint16_t lcore_id = rte_lcore_id();
+	struct vhost_bufftable *vhost_txq;
+	struct vhost_dev *vdev;
+	uint64_t cur_tsc;
+
+	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
+						+ vdev->vid];
+
+		cur_tsc = rte_rdtsc();
+		if (unlikely(cur_tsc - vhost_txq->pre_tsc
+				> MBUF_TABLE_DRAIN_TSC)) {
+			RTE_LOG_DP(DEBUG, VHOST_DATA,
+				"Vhost TX queue drained after timeout with burst size %u\n",
+				vhost_txq->len);
+			drain_vhost(vdev);
+			vhost_txq->len = 0;
+			vhost_txq->pre_tsc = cur_tsc;
+		}
+	}
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -850,7 +932,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);

 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -873,7 +956,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}

-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }

@@ -944,13 +1039,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }

-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -983,16 +1071,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)

 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}

 	/*check if destination is local VM*/
-	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
+	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
 		return;
-	}

 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
 		if (unlikely(find_local_dest(vdev, m, &offset,
@@ -1077,32 +1163,15 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }

-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
 	uint16_t rx_count, enqueue_count;
-	struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST];
-	uint32_t nr_comp = 0;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];

 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);

-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;

@@ -1128,22 +1197,31 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count, comp_pkts,
-					&nr_comp);
-		if (nr_comp > 0) {
-			free_pkts(comp_pkts, nr_comp);
-			enqueue_count -= nr_comp;
-		}
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		atomic_fetch_add(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}

 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
+		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
 	}

 	if (!async_vhost_driver)
@@ -1212,7 +1290,7 @@ switch_worker(void *arg __rte_unused)

 	while(1) {
 		drain_mbuf_table(tx_q);
-
+		drain_vhost_table();
 		/*
 		 * Inform the configuration core that we have exited the
 		 * linked list and that no devices are in use if requested.
@@ -1253,6 +1331,7 @@ destroy_device(int vid)
 {
 	struct vhost_dev *vdev = NULL;
 	int lcore;
+	uint16_t i;

 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
 		if (vdev->vid == vid)
@@ -1266,6 +1345,9 @@ destroy_device(int vid)
 		rte_pause();
 	}

+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
+
 	if (builtin_net_driver)
 		vs_vhost_net_remove(vdev);

@@ -1308,6 +1390,7 @@ static int
 new_device(int vid)
 {
 	int lcore, core_add = 0;
+	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
@@ -1319,6 +1402,19 @@ new_device(int vid)
 	}
 	vdev->vid = vid;

+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
+			= rte_zmalloc("vhost bufftable",
+				sizeof(struct vhost_bufftable),
+				RTE_CACHE_LINE_SIZE);
+
+		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
+			RTE_LOG(INFO, VHOST_DATA,
+			  "(%d) couldn't allocate memory for vhost TX\n", vid);
+			return -1;
+		}
+	}
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);

@@ -1353,12 +1449,15 @@ new_device(int vid)
 	if (async_vhost_driver) {
 		struct rte_vhost_async_features f;
 		struct rte_vhost_async_channel_ops channel_ops;
+
 		if (strncmp(dma_type, "ioat", 4) == 0) {
 			channel_ops.transfer_data = ioat_transfer_data_cb;
 			channel_ops.check_completed_copies =
 				ioat_check_completed_copies_cb;
+
 			f.async_inorder = 1;
 			f.async_threshold = 256;
+
 			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
 				f.intval, &channel_ops);
 		}
@@ -1402,8 +1501,8 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;

-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
+			rx         = atomic_load(&vdev->stats.rx_atomic);
 			rx_dropped = rx_total - rx;

 			printf("Statistics for device %d\n"
@@ -1602,6 +1701,7 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+
 		if (async_vhost_driver)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;

diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae8..6aa798a3e 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -8,6 +8,7 @@
 #include <sys/queue.h>

 #include <rte_ether.h>
+#include <stdatomic.h>

 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
@@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	atomic_int_least64_t	rx_atomic;
+	atomic_int_least64_t	rx_total_atomic;
 };

 struct vhost_queue {
@@ -51,7 +52,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	atomic_int_least16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check
  2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-11 14:15     ` Maxime Coquelin
  0 siblings, 0 replies; 44+ messages in thread
From: Maxime Coquelin @ 2021-01-11 14:15 UTC (permalink / raw)
  To: Cheng Jiang, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang



On 1/11/21 6:52 AM, Cheng Jiang wrote:
> Add ioat ring space count and check, if ioat ring space is not enough
> for the next async vhost packet enqueue, then just return to prevent
> enqueue failure. Add rte_ioat_completed_ops() fail handler.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  examples/vhost/ioat.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path
  2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path Cheng Jiang
@ 2021-01-11 14:25     ` Maxime Coquelin
  2021-01-12  4:51       ` Jiang, Cheng1
  0 siblings, 1 reply; 44+ messages in thread
From: Maxime Coquelin @ 2021-01-11 14:25 UTC (permalink / raw)
  To: Cheng Jiang, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang



On 1/11/21 6:52 AM, Cheng Jiang wrote:
> Change the vm2vm data path to batch enqueue for better performance.
> Support latest async vhost API, refactor vhost async data path,
> replace rte_atomicNN_xxx to atomic_XXX and clean some codes.

Wouldn't it be better to use GCC/Clang C11 atmoic built-ins like all
other code is being migrated to it? (i.e. __atomic_XXX)

> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  examples/vhost/ioat.h |   2 +-
>  examples/vhost/main.c | 226 ++++++++++++++++++++++++++++++------------
>  examples/vhost/main.h |   7 +-
>  3 files changed, 168 insertions(+), 67 deletions(-)
> 
> diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
> index d6e1e2e07..0a1dbb811 100644
> --- a/examples/vhost/ioat.h
> +++ b/examples/vhost/ioat.h
> @@ -11,7 +11,7 @@
> 
>  #define MAX_VHOST_DEVICE 1024
>  #define IOAT_RING_SIZE 4096
> -#define MAX_ENQUEUED_SIZE 256
> +#define MAX_ENQUEUED_SIZE 512
> 
>  struct dma_info {
>  	struct rte_pci_addr addr;
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index 22309977c..45976c93c 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -179,9 +179,22 @@ struct mbuf_table {
>  	struct rte_mbuf *m_table[MAX_PKT_BURST];
>  };
> 
> +struct vhost_bufftable {
> +	uint32_t len;
> +	uint64_t pre_tsc;
> +	struct rte_mbuf *m_table[MAX_PKT_BURST];
> +};
> +
>  /* TX queue for each data core. */
>  struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
> 
> +/*
> + * Vhost TX buffer for each data core.
> + * Every data core maintains a TX buffer for every vhost device,
> + * which is used for batch pkts enqueue for higher performance.
> + */
> +struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
> +
>  #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
>  				 / US_PER_S * BURST_TX_DRAIN_US)
>  #define VLAN_HLEN       4
> @@ -804,43 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
>  	}
>  }
> 
> +static inline void
> +free_pkts(struct rte_mbuf **pkts, uint16_t n)
> +{
> +	while (n--)
> +		rte_pktmbuf_free(pkts[n]);
> +}
> +
>  static __rte_always_inline void
> -virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
> +complete_async_pkts(struct vhost_dev *vdev)
> +{
> +	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> +	uint16_t complete_count;
> +
> +	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> +					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
> +	if (complete_count) {
> +		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
> +		free_pkts(p_cpl, complete_count);
> +	}
> +}
> +
> +static __rte_always_inline void
> +sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
>  	    struct rte_mbuf *m)
>  {
>  	uint16_t ret;
> -	struct rte_mbuf *m_cpl[1], *comp_pkt;
> -	uint32_t nr_comp = 0;
> 
>  	if (builtin_net_driver) {
>  		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
> -	} else if (async_vhost_driver) {
> -		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
> -						&m, 1, &comp_pkt, &nr_comp);
> -		if (nr_comp == 1)
> -			goto done;
> -
> -		if (likely(ret))
> -			dst_vdev->nr_async_pkts++;
> -
> -		while (likely(dst_vdev->nr_async_pkts)) {
> -			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
> -					VIRTIO_RXQ, m_cpl, 1))
> -				dst_vdev->nr_async_pkts--;
> -		}
>  	} else {
>  		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
>  	}
> 
> -done:
>  	if (enable_stats) {
> -		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
> -		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
> +		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
> +		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
>  		src_vdev->stats.tx_total++;
>  		src_vdev->stats.tx += ret;
>  	}
>  }
> 
> +static __rte_always_inline void
> +drain_vhost(struct vhost_dev *vdev)
> +{
> +	uint16_t ret;
> +	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
> +	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
> +	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
> +
> +	if (builtin_net_driver) {
> +		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> +	} else if (async_vhost_driver) {
> +		uint32_t cpu_cpl_nr = 0;
> +		uint16_t enqueue_fail = 0;
> +		struct rte_mbuf *m_cpu_cpl[nr_xmit];
> +
> +		complete_async_pkts(vdev);
> +		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> +					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
> +		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
> +
> +		if (cpu_cpl_nr)
> +			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> +
> +		enqueue_fail = nr_xmit - ret;
> +		if (enqueue_fail)
> +			free_pkts(&m[ret], nr_xmit - ret);
> +	} else {
> +		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> +						m, nr_xmit);
> +	}
> +
> +	if (enable_stats) {
> +		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
> +		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
> +	}
> +
> +	if (!async_vhost_driver)
> +		free_pkts(m, nr_xmit);
> +}
> +
> +static __rte_always_inline void
> +drain_vhost_table(void)
> +{
> +	uint16_t lcore_id = rte_lcore_id();
> +	struct vhost_bufftable *vhost_txq;
> +	struct vhost_dev *vdev;
> +	uint64_t cur_tsc;
> +
> +	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> +		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
> +						+ vdev->vid];
> +
> +		cur_tsc = rte_rdtsc();
> +		if (unlikely(cur_tsc - vhost_txq->pre_tsc
> +				> MBUF_TABLE_DRAIN_TSC)) {
> +			RTE_LOG_DP(DEBUG, VHOST_DATA,
> +				"Vhost TX queue drained after timeout with burst size %u\n",
> +				vhost_txq->len);
> +			drain_vhost(vdev);
> +			vhost_txq->len = 0;
> +			vhost_txq->pre_tsc = cur_tsc;
> +		}
> +	}
> +}
> +
>  /*
>   * Check if the packet destination MAC address is for a local device. If so then put
>   * the packet on that devices RX queue. If not then return.
> @@ -850,7 +932,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
>  {
>  	struct rte_ether_hdr *pkt_hdr;
>  	struct vhost_dev *dst_vdev;
> -
> +	struct vhost_bufftable *vhost_txq;
> +	uint16_t lcore_id = rte_lcore_id();
>  	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> 
>  	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
> @@ -873,7 +956,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
>  		return 0;
>  	}
> 
> -	virtio_xmit(dst_vdev, vdev, m);
> +	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
> +	vhost_txq->m_table[vhost_txq->len++] = m;
> +
> +	if (enable_stats) {
> +		vdev->stats.tx_total++;
> +		vdev->stats.tx++;
> +	}
> +
> +	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
> +		drain_vhost(dst_vdev);
> +		vhost_txq->len = 0;
> +		vhost_txq->pre_tsc = rte_rdtsc();
> +	}
>  	return 0;
>  }
> 
> @@ -944,13 +1039,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
>  	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
>  }
> 
> -static inline void
> -free_pkts(struct rte_mbuf **pkts, uint16_t n)
> -{
> -	while (n--)
> -		rte_pktmbuf_free(pkts[n]);
> -}
> -
>  static __rte_always_inline void
>  do_drain_mbuf_table(struct mbuf_table *tx_q)
>  {
> @@ -983,16 +1071,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
> 
>  		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
>  			if (vdev2 != vdev)
> -				virtio_xmit(vdev2, vdev, m);
> +				sync_virtio_xmit(vdev2, vdev, m);
>  		}
>  		goto queue2nic;
>  	}
> 
>  	/*check if destination is local VM*/
> -	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
> -		rte_pktmbuf_free(m);
> +	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
>  		return;
> -	}
> 
>  	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
>  		if (unlikely(find_local_dest(vdev, m, &offset,
> @@ -1077,32 +1163,15 @@ drain_mbuf_table(struct mbuf_table *tx_q)
>  	}
>  }
> 
> -static __rte_always_inline void
> -complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
> -{
> -	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> -	uint16_t complete_count;
> -
> -	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> -						qid, p_cpl, MAX_PKT_BURST);
> -	vdev->nr_async_pkts -= complete_count;
> -	if (complete_count)
> -		free_pkts(p_cpl, complete_count);
> -}
> -
>  static __rte_always_inline void
>  drain_eth_rx(struct vhost_dev *vdev)
>  {
>  	uint16_t rx_count, enqueue_count;
> -	struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST];
> -	uint32_t nr_comp = 0;
> +	struct rte_mbuf *pkts[MAX_PKT_BURST];
> 
>  	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
>  				    pkts, MAX_PKT_BURST);
> 
> -	while (likely(vdev->nr_async_pkts))
> -		complete_async_pkts(vdev, VIRTIO_RXQ);
> -
>  	if (!rx_count)
>  		return;
> 
> @@ -1128,22 +1197,31 @@ drain_eth_rx(struct vhost_dev *vdev)
>  		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
>  						pkts, rx_count);
>  	} else if (async_vhost_driver) {
> +		uint32_t cpu_cpl_nr = 0;
> +		uint16_t enqueue_fail = 0;
> +		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
> +
> +		complete_async_pkts(vdev);
>  		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> -					VIRTIO_RXQ, pkts, rx_count, comp_pkts,
> -					&nr_comp);
> -		if (nr_comp > 0) {
> -			free_pkts(comp_pkts, nr_comp);
> -			enqueue_count -= nr_comp;
> -		}
> -		vdev->nr_async_pkts += enqueue_count;
> +					VIRTIO_RXQ, pkts, rx_count,
> +					m_cpu_cpl, &cpu_cpl_nr);
> +		atomic_fetch_add(&vdev->nr_async_pkts,
> +					enqueue_count - cpu_cpl_nr);
> +		if (cpu_cpl_nr)
> +			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> +
> +		enqueue_fail = rx_count - enqueue_count;
> +		if (enqueue_fail)
> +			free_pkts(&pkts[enqueue_count], enqueue_fail);
> +
>  	} else {
>  		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
>  						pkts, rx_count);
>  	}
> 
>  	if (enable_stats) {
> -		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
> -		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
> +		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
> +		atomic_fetch_add(&vdev->stats.rx_atomic, enqueue_count);
>  	}
> 
>  	if (!async_vhost_driver)
> @@ -1212,7 +1290,7 @@ switch_worker(void *arg __rte_unused)
> 
>  	while(1) {
>  		drain_mbuf_table(tx_q);
> -
> +		drain_vhost_table();
>  		/*
>  		 * Inform the configuration core that we have exited the
>  		 * linked list and that no devices are in use if requested.
> @@ -1253,6 +1331,7 @@ destroy_device(int vid)
>  {
>  	struct vhost_dev *vdev = NULL;
>  	int lcore;
> +	uint16_t i;
> 
>  	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
>  		if (vdev->vid == vid)
> @@ -1266,6 +1345,9 @@ destroy_device(int vid)
>  		rte_pause();
>  	}
> 
> +	for (i = 0; i < RTE_MAX_LCORE; i++)
> +		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
> +
>  	if (builtin_net_driver)
>  		vs_vhost_net_remove(vdev);
> 
> @@ -1308,6 +1390,7 @@ static int
>  new_device(int vid)
>  {
>  	int lcore, core_add = 0;
> +	uint16_t i;
>  	uint32_t device_num_min = num_devices;
>  	struct vhost_dev *vdev;
>  	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
> @@ -1319,6 +1402,19 @@ new_device(int vid)
>  	}
>  	vdev->vid = vid;
> 
> +	for (i = 0; i < RTE_MAX_LCORE; i++) {
> +		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
> +			= rte_zmalloc("vhost bufftable",
> +				sizeof(struct vhost_bufftable),
> +				RTE_CACHE_LINE_SIZE);
> +
> +		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
> +			RTE_LOG(INFO, VHOST_DATA,
> +			  "(%d) couldn't allocate memory for vhost TX\n", vid);
> +			return -1;
> +		}
> +	}
> +
>  	if (builtin_net_driver)
>  		vs_vhost_net_setup(vdev);
> 
> @@ -1353,12 +1449,15 @@ new_device(int vid)
>  	if (async_vhost_driver) {
>  		struct rte_vhost_async_features f;
>  		struct rte_vhost_async_channel_ops channel_ops;
> +
>  		if (strncmp(dma_type, "ioat", 4) == 0) {
>  			channel_ops.transfer_data = ioat_transfer_data_cb;
>  			channel_ops.check_completed_copies =
>  				ioat_check_completed_copies_cb;
> +
>  			f.async_inorder = 1;
>  			f.async_threshold = 256;
> +
>  			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
>  				f.intval, &channel_ops);
>  		}
> @@ -1402,8 +1501,8 @@ print_stats(__rte_unused void *arg)
>  			tx         = vdev->stats.tx;
>  			tx_dropped = tx_total - tx;
> 
> -			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
> -			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
> +			rx_total   = atomic_load(&vdev->stats.rx_total_atomic);
> +			rx         = atomic_load(&vdev->stats.rx_atomic);
>  			rx_dropped = rx_total - rx;
> 
>  			printf("Statistics for device %d\n"
> @@ -1602,6 +1701,7 @@ main(int argc, char *argv[])
>  	/* Register vhost user driver to handle vhost messages. */
>  	for (i = 0; i < nb_sockets; i++) {
>  		char *file = socket_files + i * PATH_MAX;
> +
>  		if (async_vhost_driver)
>  			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> 
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index 4317b6ae8..6aa798a3e 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -8,6 +8,7 @@
>  #include <sys/queue.h>
> 
>  #include <rte_ether.h>
> +#include <stdatomic.h>
> 
>  /* Macros for printing using RTE_LOG */
>  #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> @@ -21,8 +22,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>  struct device_statistics {
>  	uint64_t	tx;
>  	uint64_t	tx_total;
> -	rte_atomic64_t	rx_atomic;
> -	rte_atomic64_t	rx_total_atomic;
> +	atomic_int_least64_t	rx_atomic;
> +	atomic_int_least64_t	rx_total_atomic;
>  };
> 
>  struct vhost_queue {
> @@ -51,7 +52,7 @@ struct vhost_dev {
>  	uint64_t features;
>  	size_t hdr_len;
>  	uint16_t nr_vrings;
> -	uint16_t nr_async_pkts;
> +	atomic_int_least16_t nr_async_pkts;
>  	struct rte_vhost_memory *mem;
>  	struct device_statistics stats;
>  	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> --
> 2.29.2
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor
  2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
                   ` (9 preceding siblings ...)
  2021-01-11  5:52 ` [dpdk-dev] [PATCH v8 " Cheng Jiang
@ 2021-01-12  4:38 ` Cheng Jiang
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
                     ` (2 more replies)
  10 siblings, 3 replies; 44+ messages in thread
From: Cheng Jiang @ 2021-01-12  4:38 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang

Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replace
rte_atomicNN_xxx to __atomic_XXX and refactor vhost async data path.
---
v9:
 * migrated atomic_XXX to __atomic_XXX and updated git log

v8:
 * rebased codes

v7:
 * fixed rte_ioat_completed_ops() fail handler issue

v6:
 * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h

v5:
 * added vhost enqueue buffer free when destroy a vhost device
 * added rte_ioat_completed_ops() fail handler
 * changed the behavior of drain_vhost_table() function
 * changed some variable names
 * changed some variable definition
 * added rte_zmalloc() fail handler
 * added some comments
 * fixed some typos

v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replaced rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  24 +++--
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 238 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   6 +-
 4 files changed, 191 insertions(+), 79 deletions(-)

--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check
  2021-01-12  4:38 ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Cheng Jiang
@ 2021-01-12  4:38   ` Cheng Jiang
  2021-01-21 12:34     ` Maxime Coquelin
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 2/2] examples/vhost: refactor vhost data path Cheng Jiang
  2021-01-27 12:00   ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Maxime Coquelin
  2 siblings, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-12  4:38 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang, Jiayu Hu

Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure. Add rte_ioat_completed_ops() fail handler.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 examples/vhost/ioat.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f..dbad28d43 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
+	unsigned short ioat_space;
 };

 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-
+		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
 		dma_info->nr++;
 		i++;
 	}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
+			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+				break;
 			while (i_seg < src->nr_segs) {
-				/*
-				 * TODO: Assuming that the ring space of the
-				 * IOAT device is large enough, so there is no
-				 * error here, and the actual error handling
-				 * will be added later.
-				 */
 				rte_ioat_enqueue_copy(dev_id,
 					(uintptr_t)(src->iov[i_seg].iov_base)
 						+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 				i_seg++;
 			}
 			write &= mask;
-			cb_tracker[dev_id].size_track[write] = i_seg;
+			cb_tracker[dev_id].size_track[write] = src->nr_segs;
+			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 {
 	if (!opaque_data) {
 		uintptr_t dump[255];
-		unsigned short n_seg;
+		int n_seg;
 		unsigned short read, write;
 		unsigned short nb_packet = 0;
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
+
 		int dev_id = dma_bind[vid].dmas[queue_id * 2
 				+ VIRTIO_RXQ].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-		n_seg += cb_tracker[dev_id].last_remain;
-		if (!n_seg)
+		if (n_seg <= 0)
 			return 0;
+
+		cb_tracker[dev_id].ioat_space += n_seg;
+		n_seg += cb_tracker[dev_id].last_remain;
+
 		read = cb_tracker[dev_id].next_read;
 		write = cb_tracker[dev_id].next_write;
 		for (i = 0; i < max_packets; i++) {
--
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v9 2/2] examples/vhost: refactor vhost data path
  2021-01-12  4:38 ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Cheng Jiang
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-12  4:38   ` Cheng Jiang
  2021-01-21 12:39     ` Maxime Coquelin
  2021-01-27 12:00   ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Maxime Coquelin
  2 siblings, 1 reply; 44+ messages in thread
From: Cheng Jiang @ 2021-01-12  4:38 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang, Cheng Jiang, Jiayu Hu

Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replace rte_atomicNN_xxx to __atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
---
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 238 ++++++++++++++++++++++++++++++------------
 examples/vhost/main.h |   6 +-
 3 files changed, 178 insertions(+), 68 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index d6e1e2e07..0a1dbb811 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@
 
 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 256
+#define MAX_ENQUEUED_SIZE 512
 
 struct dma_info {
 	struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 22309977c..233a2dc6e 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -14,7 +14,6 @@
 #include <sys/param.h>
 #include <unistd.h>
 
-#include <rte_atomic.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
 #include <rte_log.h>
@@ -179,9 +178,22 @@ struct mbuf_table {
 	struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+	uint32_t len;
+	uint64_t pre_tsc;
+	struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/*
+ * Vhost TX buffer for each data core.
+ * Every data core maintains a TX buffer for every vhost device,
+ * which is used for batch pkts enqueue for higher performance.
+ */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
@@ -804,43 +816,118 @@ unlink_vmdq(struct vhost_dev *vdev)
 	}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+	if (complete_count) {
+		__atomic_sub_fetch(&vdev->nr_async_pkts, complete_count,
+			__ATOMIC_SEQ_CST);
+		free_pkts(p_cpl, complete_count);
+	}
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
-	struct rte_mbuf *m_cpl[1], *comp_pkt;
-	uint32_t nr_comp = 0;
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-	} else if (async_vhost_driver) {
-		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-						&m, 1, &comp_pkt, &nr_comp);
-		if (nr_comp == 1)
-			goto done;
-
-		if (likely(ret))
-			dst_vdev->nr_async_pkts++;
-
-		while (likely(dst_vdev->nr_async_pkts)) {
-			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-					VIRTIO_RXQ, m_cpl, 1))
-				dst_vdev->nr_async_pkts--;
-		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
-done:
 	if (enable_stats) {
-		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+		__atomic_add_fetch(&dst_vdev->stats.rx_total_atomic, 1,
+				__ATOMIC_SEQ_CST);
+		__atomic_add_fetch(&dst_vdev->stats.rx_atomic, ret,
+				__ATOMIC_SEQ_CST);
 		src_vdev->stats.tx_total++;
 		src_vdev->stats.tx += ret;
 	}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+	uint16_t ret;
+	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
+	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
+
+	if (builtin_net_driver) {
+		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+		complete_async_pkts(vdev);
+		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		__atomic_add_fetch(&vdev->nr_async_pkts, ret - cpu_cpl_nr,
+				__ATOMIC_SEQ_CST);
+
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = nr_xmit - ret;
+		if (enqueue_fail)
+			free_pkts(&m[ret], nr_xmit - ret);
+	} else {
+		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+						m, nr_xmit);
+	}
+
+	if (enable_stats) {
+		__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
+				__ATOMIC_SEQ_CST);
+		__atomic_add_fetch(&vdev->stats.rx_atomic, ret,
+				__ATOMIC_SEQ_CST);
+	}
+
+	if (!async_vhost_driver)
+		free_pkts(m, nr_xmit);
+}
+
+static __rte_always_inline void
+drain_vhost_table(void)
+{
+	uint16_t lcore_id = rte_lcore_id();
+	struct vhost_bufftable *vhost_txq;
+	struct vhost_dev *vdev;
+	uint64_t cur_tsc;
+
+	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
+						+ vdev->vid];
+
+		cur_tsc = rte_rdtsc();
+		if (unlikely(cur_tsc - vhost_txq->pre_tsc
+				> MBUF_TABLE_DRAIN_TSC)) {
+			RTE_LOG_DP(DEBUG, VHOST_DATA,
+				"Vhost TX queue drained after timeout with burst size %u\n",
+				vhost_txq->len);
+			drain_vhost(vdev);
+			vhost_txq->len = 0;
+			vhost_txq->pre_tsc = cur_tsc;
+		}
+	}
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so then put
  * the packet on that devices RX queue. If not then return.
@@ -850,7 +937,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
 	struct rte_ether_hdr *pkt_hdr;
 	struct vhost_dev *dst_vdev;
-
+	struct vhost_bufftable *vhost_txq;
+	uint16_t lcore_id = rte_lcore_id();
 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
 	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
@@ -873,7 +961,19 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 		return 0;
 	}
 
-	virtio_xmit(dst_vdev, vdev, m);
+	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
+	vhost_txq->m_table[vhost_txq->len++] = m;
+
+	if (enable_stats) {
+		vdev->stats.tx_total++;
+		vdev->stats.tx++;
+	}
+
+	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
+		drain_vhost(dst_vdev);
+		vhost_txq->len = 0;
+		vhost_txq->pre_tsc = rte_rdtsc();
+	}
 	return 0;
 }
 
@@ -944,13 +1044,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
-static inline void
-free_pkts(struct rte_mbuf **pkts, uint16_t n)
-{
-	while (n--)
-		rte_pktmbuf_free(pkts[n]);
-}
-
 static __rte_always_inline void
 do_drain_mbuf_table(struct mbuf_table *tx_q)
 {
@@ -983,16 +1076,14 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 
 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
 			if (vdev2 != vdev)
-				virtio_xmit(vdev2, vdev, m);
+				sync_virtio_xmit(vdev2, vdev, m);
 		}
 		goto queue2nic;
 	}
 
 	/*check if destination is local VM*/
-	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
-		rte_pktmbuf_free(m);
+	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
 		return;
-	}
 
 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
 		if (unlikely(find_local_dest(vdev, m, &offset,
@@ -1077,32 +1168,15 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
-static __rte_always_inline void
-complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
-{
-	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
-	uint16_t complete_count;
-
-	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
-						qid, p_cpl, MAX_PKT_BURST);
-	vdev->nr_async_pkts -= complete_count;
-	if (complete_count)
-		free_pkts(p_cpl, complete_count);
-}
-
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
 	uint16_t rx_count, enqueue_count;
-	struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST];
-	uint32_t nr_comp = 0;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
 
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
 
-	while (likely(vdev->nr_async_pkts))
-		complete_async_pkts(vdev, VIRTIO_RXQ);
-
 	if (!rx_count)
 		return;
 
@@ -1128,22 +1202,34 @@ drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
 	} else if (async_vhost_driver) {
+		uint32_t cpu_cpl_nr = 0;
+		uint16_t enqueue_fail = 0;
+		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
+
+		complete_async_pkts(vdev);
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count, comp_pkts,
-					&nr_comp);
-		if (nr_comp > 0) {
-			free_pkts(comp_pkts, nr_comp);
-			enqueue_count -= nr_comp;
-		}
-		vdev->nr_async_pkts += enqueue_count;
+					VIRTIO_RXQ, pkts, rx_count,
+					m_cpu_cpl, &cpu_cpl_nr);
+		__atomic_add_fetch(&vdev->nr_async_pkts,
+					enqueue_count - cpu_cpl_nr,
+					__ATOMIC_SEQ_CST);
+		if (cpu_cpl_nr)
+			free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+		enqueue_fail = rx_count - enqueue_count;
+		if (enqueue_fail)
+			free_pkts(&pkts[enqueue_count], enqueue_fail);
+
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
 
 	if (enable_stats) {
-		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
-		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
+		__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
+				__ATOMIC_SEQ_CST);
+		__atomic_add_fetch(&vdev->stats.rx_atomic, enqueue_count,
+				__ATOMIC_SEQ_CST);
 	}
 
 	if (!async_vhost_driver)
@@ -1212,7 +1298,7 @@ switch_worker(void *arg __rte_unused)
 
 	while(1) {
 		drain_mbuf_table(tx_q);
-
+		drain_vhost_table();
 		/*
 		 * Inform the configuration core that we have exited the
 		 * linked list and that no devices are in use if requested.
@@ -1253,6 +1339,7 @@ destroy_device(int vid)
 {
 	struct vhost_dev *vdev = NULL;
 	int lcore;
+	uint16_t i;
 
 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
 		if (vdev->vid == vid)
@@ -1266,6 +1353,9 @@ destroy_device(int vid)
 		rte_pause();
 	}
 
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
+
 	if (builtin_net_driver)
 		vs_vhost_net_remove(vdev);
 
@@ -1308,6 +1398,7 @@ static int
 new_device(int vid)
 {
 	int lcore, core_add = 0;
+	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
@@ -1319,6 +1410,19 @@ new_device(int vid)
 	}
 	vdev->vid = vid;
 
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
+			= rte_zmalloc("vhost bufftable",
+				sizeof(struct vhost_bufftable),
+				RTE_CACHE_LINE_SIZE);
+
+		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
+			RTE_LOG(INFO, VHOST_DATA,
+			  "(%d) couldn't allocate memory for vhost TX\n", vid);
+			return -1;
+		}
+	}
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
 
@@ -1353,12 +1457,15 @@ new_device(int vid)
 	if (async_vhost_driver) {
 		struct rte_vhost_async_features f;
 		struct rte_vhost_async_channel_ops channel_ops;
+
 		if (strncmp(dma_type, "ioat", 4) == 0) {
 			channel_ops.transfer_data = ioat_transfer_data_cb;
 			channel_ops.check_completed_copies =
 				ioat_check_completed_copies_cb;
+
 			f.async_inorder = 1;
 			f.async_threshold = 256;
+
 			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
 				f.intval, &channel_ops);
 		}
@@ -1402,8 +1509,10 @@ print_stats(__rte_unused void *arg)
 			tx         = vdev->stats.tx;
 			tx_dropped = tx_total - tx;
 
-			rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
-			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
+			rx_total = __atomic_load_n(&vdev->stats.rx_total_atomic,
+				__ATOMIC_SEQ_CST);
+			rx         = __atomic_load_n(&vdev->stats.rx_atomic,
+				__ATOMIC_SEQ_CST);
 			rx_dropped = rx_total - rx;
 
 			printf("Statistics for device %d\n"
@@ -1602,6 +1711,7 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+
 		if (async_vhost_driver)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
 
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 4317b6ae8..2d6c05fd7 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -21,8 +21,8 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 struct device_statistics {
 	uint64_t	tx;
 	uint64_t	tx_total;
-	rte_atomic64_t	rx_atomic;
-	rte_atomic64_t	rx_total_atomic;
+	uint64_t	rx_atomic;
+	uint64_t	rx_total_atomic;
 };
 
 struct vhost_queue {
@@ -51,7 +51,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t nr_async_pkts;
+	uint64_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path
  2021-01-11 14:25     ` Maxime Coquelin
@ 2021-01-12  4:51       ` Jiang, Cheng1
  0 siblings, 0 replies; 44+ messages in thread
From: Jiang, Cheng1 @ 2021-01-12  4:51 UTC (permalink / raw)
  To: Maxime Coquelin, Xia, Chenbo; +Cc: dev, Hu, Jiayu, Yang, YvonneX, Wang, Yinan

Hi,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Monday, January 11, 2021 10:25 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>
> Subject: Re: [PATCH v8 2/2] examples/vhost: refactor vhost data path
> 
> 
> 
> On 1/11/21 6:52 AM, Cheng Jiang wrote:
> > Change the vm2vm data path to batch enqueue for better performance.
> > Support latest async vhost API, refactor vhost async data path,
> > replace rte_atomicNN_xxx to atomic_XXX and clean some codes.
> 
> Wouldn't it be better to use GCC/Clang C11 atmoic built-ins like all other code
> is being migrated to it? (i.e. __atomic_XXX)

Sure, it will be fixed in the next version.

Thanks,
Cheng

> 
> > Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> > Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
> > ---
> >  examples/vhost/ioat.h |   2 +-
> >  examples/vhost/main.c | 226 ++++++++++++++++++++++++++++++------
> ------
> >  examples/vhost/main.h |   7 +-
> >  3 files changed, 168 insertions(+), 67 deletions(-)
> >
> > diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h index
> > d6e1e2e07..0a1dbb811 100644
> > --- a/examples/vhost/ioat.h
> > +++ b/examples/vhost/ioat.h
> > @@ -11,7 +11,7 @@
> >
> >  #define MAX_VHOST_DEVICE 1024
> >  #define IOAT_RING_SIZE 4096
> > -#define MAX_ENQUEUED_SIZE 256
> > +#define MAX_ENQUEUED_SIZE 512
> >
> >  struct dma_info {
> >  	struct rte_pci_addr addr;
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > 22309977c..45976c93c 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -179,9 +179,22 @@ struct mbuf_table {
> >  	struct rte_mbuf *m_table[MAX_PKT_BURST];  };
> >
> > +struct vhost_bufftable {
> > +	uint32_t len;
> > +	uint64_t pre_tsc;
> > +	struct rte_mbuf *m_table[MAX_PKT_BURST]; };
> > +
> >  /* TX queue for each data core. */
> >  struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
> >
> > +/*
> > + * Vhost TX buffer for each data core.
> > + * Every data core maintains a TX buffer for every vhost device,
> > + * which is used for batch pkts enqueue for higher performance.
> > + */
> > +struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE *
> > +MAX_VHOST_DEVICE];
> > +
> >  #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
> >  				 / US_PER_S * BURST_TX_DRAIN_US)
> >  #define VLAN_HLEN       4
> > @@ -804,43 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
> >  	}
> >  }
> >
> > +static inline void
> > +free_pkts(struct rte_mbuf **pkts, uint16_t n) {
> > +	while (n--)
> > +		rte_pktmbuf_free(pkts[n]);
> > +}
> > +
> >  static __rte_always_inline void
> > -virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
> > +complete_async_pkts(struct vhost_dev *vdev) {
> > +	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> > +	uint16_t complete_count;
> > +
> > +	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > +					VIRTIO_RXQ, p_cpl,
> MAX_PKT_BURST);
> > +	if (complete_count) {
> > +		atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
> > +		free_pkts(p_cpl, complete_count);
> > +	}
> > +}
> > +
> > +static __rte_always_inline void
> > +sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev
> > +*src_vdev,
> >  	    struct rte_mbuf *m)
> >  {
> >  	uint16_t ret;
> > -	struct rte_mbuf *m_cpl[1], *comp_pkt;
> > -	uint32_t nr_comp = 0;
> >
> >  	if (builtin_net_driver) {
> >  		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
> > -	} else if (async_vhost_driver) {
> > -		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid,
> VIRTIO_RXQ,
> > -						&m, 1, &comp_pkt,
> &nr_comp);
> > -		if (nr_comp == 1)
> > -			goto done;
> > -
> > -		if (likely(ret))
> > -			dst_vdev->nr_async_pkts++;
> > -
> > -		while (likely(dst_vdev->nr_async_pkts)) {
> > -			if (rte_vhost_poll_enqueue_completed(dst_vdev-
> >vid,
> > -					VIRTIO_RXQ, m_cpl, 1))
> > -				dst_vdev->nr_async_pkts--;
> > -		}
> >  	} else {
> >  		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
> &m, 1);
> >  	}
> >
> > -done:
> >  	if (enable_stats) {
> > -		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
> > -		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
> > +		atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
> > +		atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
> >  		src_vdev->stats.tx_total++;
> >  		src_vdev->stats.tx += ret;
> >  	}
> >  }
> >
> > +static __rte_always_inline void
> > +drain_vhost(struct vhost_dev *vdev)
> > +{
> > +	uint16_t ret;
> > +	uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev-
> >vid;
> > +	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
> > +	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
> > +
> > +	if (builtin_net_driver) {
> > +		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> > +	} else if (async_vhost_driver) {
> > +		uint32_t cpu_cpl_nr = 0;
> > +		uint16_t enqueue_fail = 0;
> > +		struct rte_mbuf *m_cpu_cpl[nr_xmit];
> > +
> > +		complete_async_pkts(vdev);
> > +		ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> > +					m, nr_xmit, m_cpu_cpl,
> &cpu_cpl_nr);
> > +		atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
> > +
> > +		if (cpu_cpl_nr)
> > +			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> > +
> > +		enqueue_fail = nr_xmit - ret;
> > +		if (enqueue_fail)
> > +			free_pkts(&m[ret], nr_xmit - ret);
> > +	} else {
> > +		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> > +						m, nr_xmit);
> > +	}
> > +
> > +	if (enable_stats) {
> > +		atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
> > +		atomic_fetch_add(&vdev->stats.rx_atomic, ret);
> > +	}
> > +
> > +	if (!async_vhost_driver)
> > +		free_pkts(m, nr_xmit);
> > +}
> > +
> > +static __rte_always_inline void
> > +drain_vhost_table(void)
> > +{
> > +	uint16_t lcore_id = rte_lcore_id();
> > +	struct vhost_bufftable *vhost_txq;
> > +	struct vhost_dev *vdev;
> > +	uint64_t cur_tsc;
> > +
> > +	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > +		vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
> > +						+ vdev->vid];
> > +
> > +		cur_tsc = rte_rdtsc();
> > +		if (unlikely(cur_tsc - vhost_txq->pre_tsc
> > +				> MBUF_TABLE_DRAIN_TSC)) {
> > +			RTE_LOG_DP(DEBUG, VHOST_DATA,
> > +				"Vhost TX queue drained after timeout with
> burst size %u\n",
> > +				vhost_txq->len);
> > +			drain_vhost(vdev);
> > +			vhost_txq->len = 0;
> > +			vhost_txq->pre_tsc = cur_tsc;
> > +		}
> > +	}
> > +}
> > +
> >  /*
> >   * Check if the packet destination MAC address is for a local device. If so
> then put
> >   * the packet on that devices RX queue. If not then return.
> > @@ -850,7 +932,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct
> > rte_mbuf *m)  {
> >  	struct rte_ether_hdr *pkt_hdr;
> >  	struct vhost_dev *dst_vdev;
> > -
> > +	struct vhost_bufftable *vhost_txq;
> > +	uint16_t lcore_id = rte_lcore_id();
> >  	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> >
> >  	dst_vdev = find_vhost_dev(&pkt_hdr->d_addr); @@ -873,7 +956,19
> @@
> > virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
> >  		return 0;
> >  	}
> >
> > -	virtio_xmit(dst_vdev, vdev, m);
> > +	vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE +
> dst_vdev->vid];
> > +	vhost_txq->m_table[vhost_txq->len++] = m;
> > +
> > +	if (enable_stats) {
> > +		vdev->stats.tx_total++;
> > +		vdev->stats.tx++;
> > +	}
> > +
> > +	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
> > +		drain_vhost(dst_vdev);
> > +		vhost_txq->len = 0;
> > +		vhost_txq->pre_tsc = rte_rdtsc();
> > +	}
> >  	return 0;
> >  }
> >
> > @@ -944,13 +1039,6 @@ static void virtio_tx_offload(struct rte_mbuf *m)
> >  	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);  }
> >
> > -static inline void
> > -free_pkts(struct rte_mbuf **pkts, uint16_t n) -{
> > -	while (n--)
> > -		rte_pktmbuf_free(pkts[n]);
> > -}
> > -
> >  static __rte_always_inline void
> >  do_drain_mbuf_table(struct mbuf_table *tx_q)  { @@ -983,16 +1071,14
> > @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m,
> > uint16_t vlan_tag)
> >
> >  		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry)
> {
> >  			if (vdev2 != vdev)
> > -				virtio_xmit(vdev2, vdev, m);
> > +				sync_virtio_xmit(vdev2, vdev, m);
> >  		}
> >  		goto queue2nic;
> >  	}
> >
> >  	/*check if destination is local VM*/
> > -	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev,
> m) == 0)) {
> > -		rte_pktmbuf_free(m);
> > +	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev,
> m) ==
> > +0))
> >  		return;
> > -	}
> >
> >  	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
> >  		if (unlikely(find_local_dest(vdev, m, &offset, @@ -1077,32
> +1163,15
> > @@ drain_mbuf_table(struct mbuf_table *tx_q)
> >  	}
> >  }
> >
> > -static __rte_always_inline void
> > -complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) -{
> > -	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> > -	uint16_t complete_count;
> > -
> > -	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > -						qid, p_cpl, MAX_PKT_BURST);
> > -	vdev->nr_async_pkts -= complete_count;
> > -	if (complete_count)
> > -		free_pkts(p_cpl, complete_count);
> > -}
> > -
> >  static __rte_always_inline void
> >  drain_eth_rx(struct vhost_dev *vdev)
> >  {
> >  	uint16_t rx_count, enqueue_count;
> > -	struct rte_mbuf *pkts[MAX_PKT_BURST],
> *comp_pkts[MAX_PKT_BURST];
> > -	uint32_t nr_comp = 0;
> > +	struct rte_mbuf *pkts[MAX_PKT_BURST];
> >
> >  	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
> >  				    pkts, MAX_PKT_BURST);
> >
> > -	while (likely(vdev->nr_async_pkts))
> > -		complete_async_pkts(vdev, VIRTIO_RXQ);
> > -
> >  	if (!rx_count)
> >  		return;
> >
> > @@ -1128,22 +1197,31 @@ drain_eth_rx(struct vhost_dev *vdev)
> >  		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> >  						pkts, rx_count);
> >  	} else if (async_vhost_driver) {
> > +		uint32_t cpu_cpl_nr = 0;
> > +		uint16_t enqueue_fail = 0;
> > +		struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
> > +
> > +		complete_async_pkts(vdev);
> >  		enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> > -					VIRTIO_RXQ, pkts, rx_count,
> comp_pkts,
> > -					&nr_comp);
> > -		if (nr_comp > 0) {
> > -			free_pkts(comp_pkts, nr_comp);
> > -			enqueue_count -= nr_comp;
> > -		}
> > -		vdev->nr_async_pkts += enqueue_count;
> > +					VIRTIO_RXQ, pkts, rx_count,
> > +					m_cpu_cpl, &cpu_cpl_nr);
> > +		atomic_fetch_add(&vdev->nr_async_pkts,
> > +					enqueue_count - cpu_cpl_nr);
> > +		if (cpu_cpl_nr)
> > +			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> > +
> > +		enqueue_fail = rx_count - enqueue_count;
> > +		if (enqueue_fail)
> > +			free_pkts(&pkts[enqueue_count], enqueue_fail);
> > +
> >  	} else {
> >  		enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> >  						pkts, rx_count);
> >  	}
> >
> >  	if (enable_stats) {
> > -		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
> > -		rte_atomic64_add(&vdev->stats.rx_atomic,
> enqueue_count);
> > +		atomic_fetch_add(&vdev->stats.rx_total_atomic, rx_count);
> > +		atomic_fetch_add(&vdev->stats.rx_atomic,
> enqueue_count);
> >  	}
> >
> >  	if (!async_vhost_driver)
> > @@ -1212,7 +1290,7 @@ switch_worker(void *arg __rte_unused)
> >
> >  	while(1) {
> >  		drain_mbuf_table(tx_q);
> > -
> > +		drain_vhost_table();
> >  		/*
> >  		 * Inform the configuration core that we have exited the
> >  		 * linked list and that no devices are in use if requested.
> > @@ -1253,6 +1331,7 @@ destroy_device(int vid)  {
> >  	struct vhost_dev *vdev = NULL;
> >  	int lcore;
> > +	uint16_t i;
> >
> >  	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> >  		if (vdev->vid == vid)
> > @@ -1266,6 +1345,9 @@ destroy_device(int vid)
> >  		rte_pause();
> >  	}
> >
> > +	for (i = 0; i < RTE_MAX_LCORE; i++)
> > +		rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
> > +
> >  	if (builtin_net_driver)
> >  		vs_vhost_net_remove(vdev);
> >
> > @@ -1308,6 +1390,7 @@ static int
> >  new_device(int vid)
> >  {
> >  	int lcore, core_add = 0;
> > +	uint16_t i;
> >  	uint32_t device_num_min = num_devices;
> >  	struct vhost_dev *vdev;
> >  	vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> > RTE_CACHE_LINE_SIZE); @@ -1319,6 +1402,19 @@ new_device(int vid)
> >  	}
> >  	vdev->vid = vid;
> >
> > +	for (i = 0; i < RTE_MAX_LCORE; i++) {
> > +		vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
> > +			= rte_zmalloc("vhost bufftable",
> > +				sizeof(struct vhost_bufftable),
> > +				RTE_CACHE_LINE_SIZE);
> > +
> > +		if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
> > +			RTE_LOG(INFO, VHOST_DATA,
> > +			  "(%d) couldn't allocate memory for vhost TX\n", vid);
> > +			return -1;
> > +		}
> > +	}
> > +
> >  	if (builtin_net_driver)
> >  		vs_vhost_net_setup(vdev);
> >
> > @@ -1353,12 +1449,15 @@ new_device(int vid)
> >  	if (async_vhost_driver) {
> >  		struct rte_vhost_async_features f;
> >  		struct rte_vhost_async_channel_ops channel_ops;
> > +
> >  		if (strncmp(dma_type, "ioat", 4) == 0) {
> >  			channel_ops.transfer_data = ioat_transfer_data_cb;
> >  			channel_ops.check_completed_copies =
> >  				ioat_check_completed_copies_cb;
> > +
> >  			f.async_inorder = 1;
> >  			f.async_threshold = 256;
> > +
> >  			return rte_vhost_async_channel_register(vid,
> VIRTIO_RXQ,
> >  				f.intval, &channel_ops);
> >  		}
> > @@ -1402,8 +1501,8 @@ print_stats(__rte_unused void *arg)
> >  			tx         = vdev->stats.tx;
> >  			tx_dropped = tx_total - tx;
> >
> > -			rx_total   = rte_atomic64_read(&vdev-
> >stats.rx_total_atomic);
> > -			rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
> > +			rx_total   = atomic_load(&vdev-
> >stats.rx_total_atomic);
> > +			rx         = atomic_load(&vdev->stats.rx_atomic);
> >  			rx_dropped = rx_total - rx;
> >
> >  			printf("Statistics for device %d\n"
> > @@ -1602,6 +1701,7 @@ main(int argc, char *argv[])
> >  	/* Register vhost user driver to handle vhost messages. */
> >  	for (i = 0; i < nb_sockets; i++) {
> >  		char *file = socket_files + i * PATH_MAX;
> > +
> >  		if (async_vhost_driver)
> >  			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> >
> > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index
> > 4317b6ae8..6aa798a3e 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -8,6 +8,7 @@
> >  #include <sys/queue.h>
> >
> >  #include <rte_ether.h>
> > +#include <stdatomic.h>
> >
> >  /* Macros for printing using RTE_LOG */  #define
> > RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 @@ -21,8 +22,8 @@
> enum
> > {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};  struct device_statistics {
> >  	uint64_t	tx;
> >  	uint64_t	tx_total;
> > -	rte_atomic64_t	rx_atomic;
> > -	rte_atomic64_t	rx_total_atomic;
> > +	atomic_int_least64_t	rx_atomic;
> > +	atomic_int_least64_t	rx_total_atomic;
> >  };
> >
> >  struct vhost_queue {
> > @@ -51,7 +52,7 @@ struct vhost_dev {
> >  	uint64_t features;
> >  	size_t hdr_len;
> >  	uint16_t nr_vrings;
> > -	uint16_t nr_async_pkts;
> > +	atomic_int_least16_t nr_async_pkts;
> >  	struct rte_vhost_memory *mem;
> >  	struct device_statistics stats;
> >  	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> > --
> > 2.29.2
> >


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
@ 2021-01-21 12:34     ` Maxime Coquelin
  2021-01-22  2:16       ` Jiang, Cheng1
  0 siblings, 1 reply; 44+ messages in thread
From: Maxime Coquelin @ 2021-01-21 12:34 UTC (permalink / raw)
  To: Cheng Jiang, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang



On 1/12/21 5:38 AM, Cheng Jiang wrote:
> Add ioat ring space count and check, if ioat ring space is not enough
> for the next async vhost packet enqueue, then just return to prevent
> enqueue failure. Add rte_ioat_completed_ops() fail handler.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  examples/vhost/ioat.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
> index 71d8a1f1f..dbad28d43 100644
> --- a/examples/vhost/ioat.c
> +++ b/examples/vhost/ioat.c
> @@ -17,6 +17,7 @@ struct packet_tracker {
>  	unsigned short next_read;
>  	unsigned short next_write;
>  	unsigned short last_remain;
> +	unsigned short ioat_space;
>  };
> 
>  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
> @@ -113,7 +114,7 @@ open_ioat(const char *value)
>  			goto out;
>  		}
>  		rte_rawdev_start(dev_id);
> -
> +		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
>  		dma_info->nr++;
>  		i++;
>  	}
> @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  			src = descs[i_desc].src;
>  			dst = descs[i_desc].dst;
>  			i_seg = 0;
> +			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
> +				break;
>  			while (i_seg < src->nr_segs) {
> -				/*
> -				 * TODO: Assuming that the ring space of the
> -				 * IOAT device is large enough, so there is no
> -				 * error here, and the actual error handling
> -				 * will be added later.
> -				 */
>  				rte_ioat_enqueue_copy(dev_id,
>  					(uintptr_t)(src->iov[i_seg].iov_base)
>  						+ src->offset,
> @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>  				i_seg++;
>  			}
>  			write &= mask;
> -			cb_tracker[dev_id].size_track[write] = i_seg;
> +			cb_tracker[dev_id].size_track[write] = src->nr_segs;
> +			cb_tracker[dev_id].ioat_space -= src->nr_segs;
>  			write++;
>  		}
>  	} else {
> @@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
>  {
>  	if (!opaque_data) {
>  		uintptr_t dump[255];
> -		unsigned short n_seg;
> +		int n_seg;
>  		unsigned short read, write;
>  		unsigned short nb_packet = 0;
>  		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
>  		unsigned short i;
> +
>  		int dev_id = dma_bind[vid].dmas[queue_id * 2
>  				+ VIRTIO_RXQ].dev_id;
>  		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
> -		n_seg += cb_tracker[dev_id].last_remain;
> -		if (!n_seg)
> +		if (n_seg <= 0)
>  			return 0;

In a separate patch, it might make sense to propagate the error if
rte_ioat_completed_ops return -1.

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Maxime
> +
> +		cb_tracker[dev_id].ioat_space += n_seg;
> +		n_seg += cb_tracker[dev_id].last_remain;
> +
>  		read = cb_tracker[dev_id].next_read;
>  		write = cb_tracker[dev_id].next_write;
>  		for (i = 0; i < max_packets; i++) {
> --
> 2.29.2
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v9 2/2] examples/vhost: refactor vhost data path
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 2/2] examples/vhost: refactor vhost data path Cheng Jiang
@ 2021-01-21 12:39     ` Maxime Coquelin
  0 siblings, 0 replies; 44+ messages in thread
From: Maxime Coquelin @ 2021-01-21 12:39 UTC (permalink / raw)
  To: Cheng Jiang, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang



On 1/12/21 5:38 AM, Cheng Jiang wrote:
> Change the vm2vm data path to batch enqueue for better performance.
> Support latest async vhost API, refactor vhost async data path,
> replace rte_atomicNN_xxx to __atomic_XXX and clean some codes.
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  examples/vhost/ioat.h |   2 +-
>  examples/vhost/main.c | 238 ++++++++++++++++++++++++++++++------------
>  examples/vhost/main.h |   6 +-
>  3 files changed, 178 insertions(+), 68 deletions(-)
> 


Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check
  2021-01-21 12:34     ` Maxime Coquelin
@ 2021-01-22  2:16       ` Jiang, Cheng1
  0 siblings, 0 replies; 44+ messages in thread
From: Jiang, Cheng1 @ 2021-01-22  2:16 UTC (permalink / raw)
  To: Maxime Coquelin, Xia, Chenbo; +Cc: dev, Hu, Jiayu, Yang, YvonneX, Wang, Yinan

Hi,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, January 21, 2021 8:35 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Wang, Yinan <yinan.wang@intel.com>
> Subject: Re: [PATCH v9 1/2] examples/vhost: add ioat ring space count and
> check
> 
> 
> 
> On 1/12/21 5:38 AM, Cheng Jiang wrote:
> > Add ioat ring space count and check, if ioat ring space is not enough
> > for the next async vhost packet enqueue, then just return to prevent
> > enqueue failure. Add rte_ioat_completed_ops() fail handler.
> >
> > Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> > Reviewed-by: Jiayu Hu <jiayu.hu@intel.com>
> > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >  examples/vhost/ioat.c | 24 +++++++++++++-----------
> >  1 file changed, 13 insertions(+), 11 deletions(-)
> >
> > diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index
> > 71d8a1f1f..dbad28d43 100644
> > --- a/examples/vhost/ioat.c
> > +++ b/examples/vhost/ioat.c
> > @@ -17,6 +17,7 @@ struct packet_tracker {
> >  	unsigned short next_read;
> >  	unsigned short next_write;
> >  	unsigned short last_remain;
> > +	unsigned short ioat_space;
> >  };
> >
> >  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; @@ -113,7 +114,7
> > @@ open_ioat(const char *value)
> >  			goto out;
> >  		}
> >  		rte_rawdev_start(dev_id);
> > -
> > +		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
> >  		dma_info->nr++;
> >  		i++;
> >  	}
> > @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
> >  			src = descs[i_desc].src;
> >  			dst = descs[i_desc].dst;
> >  			i_seg = 0;
> > +			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
> > +				break;
> >  			while (i_seg < src->nr_segs) {
> > -				/*
> > -				 * TODO: Assuming that the ring space of the
> > -				 * IOAT device is large enough, so there is no
> > -				 * error here, and the actual error handling
> > -				 * will be added later.
> > -				 */
> >  				rte_ioat_enqueue_copy(dev_id,
> >  					(uintptr_t)(src->iov[i_seg].iov_base)
> >  						+ src->offset,
> > @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
> >  				i_seg++;
> >  			}
> >  			write &= mask;
> > -			cb_tracker[dev_id].size_track[write] = i_seg;
> > +			cb_tracker[dev_id].size_track[write] = src->nr_segs;
> > +			cb_tracker[dev_id].ioat_space -= src->nr_segs;
> >  			write++;
> >  		}
> >  	} else {
> > @@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid,
> uint16_t
> > queue_id,  {
> >  	if (!opaque_data) {
> >  		uintptr_t dump[255];
> > -		unsigned short n_seg;
> > +		int n_seg;
> >  		unsigned short read, write;
> >  		unsigned short nb_packet = 0;
> >  		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
> >  		unsigned short i;
> > +
> >  		int dev_id = dma_bind[vid].dmas[queue_id * 2
> >  				+ VIRTIO_RXQ].dev_id;
> >  		n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
> > -		n_seg += cb_tracker[dev_id].last_remain;
> > -		if (!n_seg)
> > +		if (n_seg <= 0)
> >  			return 0;
> 
> In a separate patch, it might make sense to propagate the error if
> rte_ioat_completed_ops return -1.

Sure, I'll send a patch to fix it when this patch set is merged.

Thanks,
Cheng

> 
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> Maxime
> > +
> > +		cb_tracker[dev_id].ioat_space += n_seg;
> > +		n_seg += cb_tracker[dev_id].last_remain;
> > +
> >  		read = cb_tracker[dev_id].next_read;
> >  		write = cb_tracker[dev_id].next_write;
> >  		for (i = 0; i < max_packets; i++) {
> > --
> > 2.29.2
> >


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor
  2021-01-12  4:38 ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Cheng Jiang
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
  2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 2/2] examples/vhost: refactor vhost data path Cheng Jiang
@ 2021-01-27 12:00   ` Maxime Coquelin
  2 siblings, 0 replies; 44+ messages in thread
From: Maxime Coquelin @ 2021-01-27 12:00 UTC (permalink / raw)
  To: Cheng Jiang, chenbo.xia; +Cc: dev, Jiayu.Hu, YvonneX.Yang, yinan.wang



On 1/12/21 5:38 AM, Cheng Jiang wrote:
> Refactor the vhost sample code. Add ioat ring space count and check
> in ioat callback, optimize vhost data path for batch enqueue, replace
> rte_atomicNN_xxx to __atomic_XXX and refactor vhost async data path.
> ---
> v9:
>  * migrated atomic_XXX to __atomic_XXX and updated git log
> 
> v8:
>  * rebased codes
> 
> v7:
>  * fixed rte_ioat_completed_ops() fail handler issue
> 
> v6:
>  * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h
> 
> v5:
>  * added vhost enqueue buffer free when destroy a vhost device
>  * added rte_ioat_completed_ops() fail handler
>  * changed the behavior of drain_vhost_table() function
>  * changed some variable names
>  * changed some variable definition
>  * added rte_zmalloc() fail handler
>  * added some comments
>  * fixed some typos
> 
> v4:
>  * improved code structure
>  * improved vhost enqueue buffer memory allocation
>  * cleaned some codes
> 
> v3:
>  * added some variable initiation
>  * cleaned some codes
> 
> v2:
>  * optimized patch structure
>  * optimized git log
>  * replaced rte_atomicNN_xxx to atomic_XXX
> 
> Cheng Jiang (2):
>   examples/vhost: add ioat ring space count and check
>   examples/vhost: refactor vhost data path
> 
>  examples/vhost/ioat.c |  24 +++--
>  examples/vhost/ioat.h |   2 +-
>  examples/vhost/main.c | 238 ++++++++++++++++++++++++++++++------------
>  examples/vhost/main.h |   6 +-
>  4 files changed, 191 insertions(+), 79 deletions(-)
> 
> --
> 2.29.2
> 


Applied to dpdk-next-virtio/main.

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2021-01-27 12:00 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-18 11:33 [dpdk-dev] [PATCH v1 0/3] examples/vhost: sample code refactor Cheng Jiang
2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 1/3] examples/vhost: add ioat ring space count and check Cheng Jiang
2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 2/3] examples/vhost: optimize vhost data path for batch Cheng Jiang
2020-12-18 11:33 ` [dpdk-dev] [PATCH v1 3/3] examples/vhost: refactor vhost async data path Cheng Jiang
2020-12-22  8:36 ` [dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor Cheng Jiang
2020-12-22  8:36   ` [dpdk-dev] [PATCH v2 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2020-12-22  8:36   ` [dpdk-dev] [PATCH v2 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2020-12-24  8:49 ` [dpdk-dev] [PATCH v3 0/2] examples/vhost: sample code refactor Cheng Jiang
2020-12-24  8:49   ` [dpdk-dev] [PATCH v3 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2020-12-24  8:49   ` [dpdk-dev] [PATCH v3 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2020-12-25  8:07 ` [dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor Cheng Jiang
2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2020-12-28  2:50     ` Hu, Jiayu
2020-12-28  8:08       ` Jiang, Cheng1
2020-12-25  8:07   ` [dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2020-12-28  4:03     ` Hu, Jiayu
2020-12-28  8:21       ` Jiang, Cheng1
2020-12-28  7:16 ` [dpdk-dev] [PATCH v5 0/2] examples/vhost: sample code refactor Cheng Jiang
2020-12-28  7:16   ` [dpdk-dev] [PATCH v5 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2020-12-28  7:16   ` [dpdk-dev] [PATCH v5 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2021-01-04  4:57 ` [dpdk-dev] [PATCH v6 0/2] examples/vhost: sample code refactor Cheng Jiang
2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2021-01-05  1:19     ` Hu, Jiayu
2021-01-05  1:51       ` Jiang, Cheng1
2021-01-04  4:57   ` [dpdk-dev] [PATCH v6 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2021-01-05  1:43     ` Hu, Jiayu
2021-01-05  2:15 ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Cheng Jiang
2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2021-01-05  6:56     ` Hu, Jiayu
2021-01-05  2:15   ` [dpdk-dev] [PATCH v7 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2021-01-06  7:47   ` [dpdk-dev] [PATCH v7 0/2] examples/vhost: sample code refactor Ling, WeiX
2021-01-11  5:52 ` [dpdk-dev] [PATCH v8 " Cheng Jiang
2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2021-01-11 14:15     ` Maxime Coquelin
2021-01-11  5:52   ` [dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2021-01-11 14:25     ` Maxime Coquelin
2021-01-12  4:51       ` Jiang, Cheng1
2021-01-12  4:38 ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Cheng Jiang
2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 1/2] examples/vhost: add ioat ring space count and check Cheng Jiang
2021-01-21 12:34     ` Maxime Coquelin
2021-01-22  2:16       ` Jiang, Cheng1
2021-01-12  4:38   ` [dpdk-dev] [PATCH v9 2/2] examples/vhost: refactor vhost data path Cheng Jiang
2021-01-21 12:39     ` Maxime Coquelin
2021-01-27 12:00   ` [dpdk-dev] [PATCH v9 0/2] examples/vhost: sample code refactor Maxime Coquelin

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git