DPDK patches and discussions
 help / color / mirror / Atom feed
* Re: [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe
  2017-11-24 11:03 ` [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe Xiao Wang
@ 2017-11-24  5:38   ` Tiwei Bie
  2017-11-30  2:10     ` Wang, Xiao W
  2017-11-30  2:59   ` Stephen Hemminger
  2017-12-04 14:02   ` [dpdk-dev] [PATCH v2 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 1 reply; 112+ messages in thread
From: Tiwei Bie @ 2017-11-24  5:38 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu

On Fri, Nov 24, 2017 at 03:03:59AM -0800, Xiao Wang wrote:
[...]
> diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> index 54f1e84..24e3026 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -84,6 +84,7 @@ struct virtnet_ctl {
>  	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
>  	uint16_t port_id;               /**< Device port identifier. */
>  	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
> +	rte_spinlock_t sl;				/**< spinlock for control queue. */

Please use spaces instead of (4 spaces) tabs between the code and comments.

Best regards,
Tiwei Bie

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-11-24 11:04 ` [dpdk-dev] [PATCH 2/2] " Xiao Wang
@ 2017-11-24  6:04   ` Tiwei Bie
  2017-11-30  2:37     ` Wang, Xiao W
  2017-11-27 12:48   ` Yuanhan Liu
  1 sibling, 1 reply; 112+ messages in thread
From: Tiwei Bie @ 2017-11-24  6:04 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu

Hi,

Some quick comments. Will go through the whole patch later.

On Fri, Nov 24, 2017 at 03:04:00AM -0800, Xiao Wang wrote:
> When live migration is done, for the backup VM, either the virtio
> frontend or the vhost backend needs to send out gratuitous RARP packet
> to announce its new network location.
> 
> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
> migration scenario where the vhost backend doesn't have the ability to
> generate RARP packet.
> 
> Brief introduction of the work flow:
> 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> 2. Virtio interrupt handler reads out the interrupt status value, and
>    realizes it needs to send out RARP packet to announce its location.
> 3. Pause device to stop worker thread touching the queues.
> 4. Inject a RARP packet into a Tx Queue.
> 5. Ack the interrupt via control queue.
> 6. Resume device to continue packet processing.
> 
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> ---
>  drivers/net/virtio/virtio_ethdev.c | 131 ++++++++++++++++++++++++++++++++++++-
>  drivers/net/virtio/virtio_ethdev.h |   4 ++
>  drivers/net/virtio/virtio_pci.h    |   1 +
>  drivers/net/virtio/virtio_rxtx.c   |  81 +++++++++++++++++++++++
>  drivers/net/virtio/virtqueue.h     |  11 ++++
>  5 files changed, 226 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 1959b11..6eaea0e 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -48,6 +48,8 @@
>  #include <rte_pci.h>
>  #include <rte_bus_pci.h>
>  #include <rte_ether.h>
> +#include <rte_ip.h>
> +#include <rte_arp.h>
>  #include <rte_common.h>
>  #include <rte_errno.h>
>  #include <rte_cpuflags.h>
> @@ -55,6 +57,7 @@
>  #include <rte_memory.h>
>  #include <rte_eal.h>
>  #include <rte_dev.h>
> +#include <rte_cycles.h>
>  
>  #include "virtio_ethdev.h"
>  #include "virtio_pci.h"
> @@ -106,6 +109,13 @@ static int virtio_dev_queue_stats_mapping_set(
>  	uint8_t stat_idx,
>  	uint8_t is_rx);
>  
> +static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
> +		const struct ether_addr *mac);
> +static int virtio_dev_pause(struct rte_eth_dev *dev);
> +static void virtio_dev_resume(struct rte_eth_dev *dev);
> +static void generate_rarp(struct rte_eth_dev *dev);
> +static void virtnet_ack_link_announce(struct rte_eth_dev *dev);
> +
>  /*
>   * The set of PCI devices this driver supports
>   */
> @@ -1249,9 +1259,116 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
>  	return 0;
>  }
>  
> +#define RARP_PKT_SIZE	64
> +
> +static int
> +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr  *rarp;
> +
> +	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
> +		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
> +				rarp_mbuf->buf_len, RARP_PKT_SIZE);
> +		return -1;
> +	}
> +
> +	/* Ethernet header. */
> +	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);

You can use rte_pktmbuf_mtod() directly.

> +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> +	ether_addr_copy(mac, &eth_hdr->s_addr);
> +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> +
> +	/* RARP header. */
> +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> +	rarp->arp_hln = ETHER_ADDR_LEN;
> +	rarp->arp_pln = 4;
> +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> +
> +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> +
> +	rarp_mbuf->data_len = RARP_PKT_SIZE;
> +	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
> +
> +	return 0;
> +}
> +
> +static int
> +virtio_dev_pause(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	if (hw->started == 0)
> +		return -1;
> +	hw->started = 0;
> +	/*
> +	 * Prevent the worker thread from touching queues to avoid condition,
> +	 * 1 ms should be enough for the ongoing Tx function to finish.
> +	 */
> +	rte_delay_ms(1);
> +	return 0;
> +}
> +
> +static void
> +virtio_dev_resume(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	hw->started = 1;
> +}
> +
> +static void
> +generate_rarp(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct rte_mbuf *rarp_mbuf = NULL;
> +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +
> +	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
> +	if (rarp_mbuf == NULL) {
> +		PMD_DRV_LOG(ERR, "mbuf allocate failed");
> +		return;
> +	}
> +
> +	if (make_rarp_packet(rarp_mbuf, (struct ether_addr *)hw->mac_addr)) {
> +		rte_pktmbuf_free(rarp_mbuf);
> +		rarp_mbuf = NULL;
> +		return;
> +	}
> +
> +	/* If virtio port just stopped, no need to send RARP */
> +	if (virtio_dev_pause(dev) < -1)

You mean < 0?

> +		return;
> +
> +	virtio_inject_pkts(txvq, &rarp_mbuf, 1);
> +	/* Recover the stored hw status to let worker thread continue */
> +	virtio_dev_resume(dev);
> +}
> +
> +static void
> +virtnet_ack_link_announce(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtio_pmd_ctrl ctrl;
> +	int len;
> +
> +	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
> +	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
> +	len = 0;
> +
> +	virtio_send_command(hw->cvq, &ctrl, &len, 0);
> +}
> +
>  /*
> - * Process Virtio Config changed interrupt and call the callback
> - * if link state changed.
> + * Process virtio config changed interrupt. Call the callback
> + * if link state changed; generate gratuitous RARP packet if
> + * the status indicates an ANNOUNCE.
>   */
>  void
>  virtio_interrupt_handler(void *param)
> @@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
>  						      NULL, NULL);
>  	}
>  
> +	if (isr & VIRTIO_NET_S_ANNOUNCE) {
> +		rte_spinlock_lock(&hw->sl);
> +		generate_rarp(dev);
> +		virtnet_ack_link_announce(dev);
> +		rte_spinlock_unlock(&hw->sl);
> +	}
>  }
>  
>  /* set rx and tx handlers according to what is supported */
> @@ -1786,6 +1909,8 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
>  			return -EBUSY;
>  		}
>  
> +	rte_spinlock_init(&hw->sl);
> +
>  	hw->use_simple_rx = 1;
>  	hw->use_simple_tx = 1;
>  
> @@ -1952,12 +2077,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
>  
>  	PMD_INIT_LOG(DEBUG, "stop");
>  
> +	rte_spinlock_lock(&hw->sl);
>  	if (intr_conf->lsc || intr_conf->rxq)
>  		virtio_intr_disable(dev);
>  
>  	hw->started = 0;
>  	memset(&link, 0, sizeof(link));
>  	virtio_dev_atomic_write_link_status(dev, &link);
> +	rte_spinlock_unlock(&hw->sl);
>  }
>  
>  static int
> diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
> index 2039bc5..24271cb 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -67,6 +67,7 @@
>  	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
>  	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
>  	 1u << VIRTIO_NET_F_MTU	| \
> +	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
>  	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
>  	 1ULL << VIRTIO_F_VERSION_1       |	\
>  	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
> @@ -111,6 +112,9 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		uint16_t nb_pkts);
>  
> +uint16_t virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> +		uint16_t nb_pkts);
> +
>  uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>  		uint16_t nb_pkts);
>  
> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> index 3c5ce66..3cd367e 100644
> --- a/drivers/net/virtio/virtio_pci.h
> +++ b/drivers/net/virtio/virtio_pci.h
> @@ -270,6 +270,7 @@ struct virtio_hw {
>  	struct virtio_pci_common_cfg *common_cfg;
>  	struct virtio_net_config *dev_cfg;
>  	void	    *virtio_user_dev;
> +	rte_spinlock_t sl;

Need to add some detailed comments to describe what's
protected by this lock.

>  
>  	struct virtqueue **vqs;
>  };
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index 6a24fde..7313bdd 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -1100,3 +1100,84 @@
>  
>  	return nb_tx;
>  }
> +
> +uint16_t
> +virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> +{
> +	struct virtnet_tx *txvq = tx_queue;
> +	struct virtqueue *vq = txvq->vq;
> +	struct virtio_hw *hw = vq->hw;
> +	uint16_t hdr_size = hw->vtnet_hdr_size;
> +	uint16_t nb_used, nb_tx = 0;
> +
> +	if (unlikely(nb_pkts < 1))
> +		return nb_pkts;
> +
> +	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> +	nb_used = VIRTQUEUE_NUSED(vq);
> +
> +	virtio_rmb();
> +	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> +		virtio_xmit_cleanup(vq, nb_used);
> +
> +	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> +		struct rte_mbuf *txm = tx_pkts[nb_tx];
> +		int can_push = 0, use_indirect = 0, slots, need;
> +
> +		/* optimize ring usage */
> +		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
> +					vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
> +			rte_mbuf_refcnt_read(txm) == 1 &&
> +			RTE_MBUF_DIRECT(txm) &&
> +			txm->nb_segs == 1 &&
> +			rte_pktmbuf_headroom(txm) >= hdr_size &&
> +			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
> +				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
> +			can_push = 1;
> +		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
> +			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
> +			use_indirect = 1;
> +
> +		/* How many main ring entries are needed to this Tx?
> +		 * any_layout => number of segments
> +		 * indirect   => 1
> +		 * default    => number of segments + 1
> +		 */
> +		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
> +		need = slots - vq->vq_free_cnt;
> +
> +		/* Positive value indicates it need free vring descriptors */
> +		if (unlikely(need > 0)) {
> +			nb_used = VIRTQUEUE_NUSED(vq);
> +			virtio_rmb();
> +			need = RTE_MIN(need, (int)nb_used);
> +
> +			virtio_xmit_cleanup(vq, need);
> +			need = slots - vq->vq_free_cnt;
> +			if (unlikely(need > 0)) {
> +				PMD_TX_LOG(ERR,
> +						"No free tx descriptors to transmit");
> +				break;
> +			}
> +		}
> +
> +		/* Enqueue Packet buffers */
> +		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
> +
> +		txvq->stats.bytes += txm->pkt_len;
> +		virtio_update_packet_stats(&txvq->stats, txm);
> +	}
> +
> +	txvq->stats.packets += nb_tx;
> +
> +	if (likely(nb_tx)) {
> +		vq_update_avail_idx(vq);
> +
> +		if (unlikely(virtqueue_kick_prepare(vq))) {
> +			virtqueue_notify(vq);
> +			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
> +		}
> +	}
> +
> +	return nb_tx;
> +}

What's the difference between virtio_inject_pkts() and
virtio_xmit_pkts() except the latter will check hw->started?

Best regards,
Tiwei Bie

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH 0/2] net/virtio: support GUEST ANNOUNCE
@ 2017-11-24 11:03 Xiao Wang
  2017-11-24 11:03 ` [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe Xiao Wang
  2017-11-24 11:04 ` [dpdk-dev] [PATCH 2/2] " Xiao Wang
  0 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2017-11-24 11:03 UTC (permalink / raw)
  To: dev; +Cc: yliu, Xiao Wang

When live migration is finished, the backup VM needs to proactively announce
its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
generate a RARP packet to switch in dequeue path. Another method is to let
the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
feature.

This patch set enables this feature in virtio pmd, to support VM running virtio
pmd be migrated without vhost supporting RARP generation.

Xiao Wang (2):
  net/virtio: make control queue thread-safe
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c | 138 ++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |   4 ++
 drivers/net/virtio/virtio_pci.h    |   1 +
 drivers/net/virtio/virtio_rxtx.c   |  82 ++++++++++++++++++++++
 drivers/net/virtio/virtio_rxtx.h   |   1 +
 drivers/net/virtio/virtqueue.h     |  11 +++
 6 files changed, 234 insertions(+), 3 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe
  2017-11-24 11:03 [dpdk-dev] [PATCH 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2017-11-24 11:03 ` Xiao Wang
  2017-11-24  5:38   ` Tiwei Bie
                     ` (2 more replies)
  2017-11-24 11:04 ` [dpdk-dev] [PATCH 2/2] " Xiao Wang
  1 sibling, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2017-11-24 11:03 UTC (permalink / raw)
  To: dev; +Cc: yliu, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e0328f6..1959b11 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -177,6 +177,8 @@ struct rte_virtio_xstats_name_off {
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->sl);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) {
+		rte_spinlock_unlock(&cvq->sl);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -261,6 +265,7 @@ struct rte_virtio_xstats_name_off {
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->sl);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 390c137..6a24fde 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -407,6 +407,7 @@
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->sl);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 54f1e84..24e3026 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -84,6 +84,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t sl;				/**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-11-24 11:03 [dpdk-dev] [PATCH 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2017-11-24 11:03 ` [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe Xiao Wang
@ 2017-11-24 11:04 ` Xiao Wang
  2017-11-24  6:04   ` Tiwei Bie
  2017-11-27 12:48   ` Yuanhan Liu
  1 sibling, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2017-11-24 11:04 UTC (permalink / raw)
  To: dev; +Cc: yliu, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 131 ++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |   4 ++
 drivers/net/virtio/virtio_pci.h    |   1 +
 drivers/net/virtio/virtio_rxtx.c   |  81 +++++++++++++++++++++++
 drivers/net/virtio/virtqueue.h     |  11 ++++
 5 files changed, 226 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 1959b11..6eaea0e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -48,6 +48,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -55,6 +57,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -106,6 +109,13 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static int virtio_dev_pause(struct rte_eth_dev *dev);
+static void virtio_dev_resume(struct rte_eth_dev *dev);
+static void generate_rarp(struct rte_eth_dev *dev);
+static void virtnet_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1249,9 +1259,116 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	return 0;
 }
 
+#define RARP_PKT_SIZE	64
+
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (hw->started == 0)
+		return -1;
+	hw->started = 0;
+	/*
+	 * Prevent the worker thread from touching queues to avoid condition,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+static void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+}
+
+static void
+generate_rarp(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+
+	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (make_rarp_packet(rarp_mbuf, (struct ether_addr *)hw->mac_addr)) {
+		rte_pktmbuf_free(rarp_mbuf);
+		rarp_mbuf = NULL;
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < -1)
+		return;
+
+	virtio_inject_pkts(txvq, &rarp_mbuf, 1);
+	/* Recover the stored hw status to let worker thread continue */
+	virtio_dev_resume(dev);
+}
+
+static void
+virtnet_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+	len = 0;
+
+	virtio_send_command(hw->cvq, &ctrl, &len, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed; generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		rte_spinlock_lock(&hw->sl);
+		generate_rarp(dev);
+		virtnet_ack_link_announce(dev);
+		rte_spinlock_unlock(&hw->sl);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
@@ -1786,6 +1909,8 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->sl);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1952,12 +2077,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->sl);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->sl);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 2039bc5..24271cb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
@@ -111,6 +112,9 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 3c5ce66..3cd367e 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -270,6 +270,7 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	rte_spinlock_t sl;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6a24fde..7313bdd 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -1100,3 +1100,84 @@
 
 	return nb_tx;
 }
+
+uint16_t
+virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct virtnet_tx *txvq = tx_queue;
+	struct virtqueue *vq = txvq->vq;
+	struct virtio_hw *hw = vq->hw;
+	uint16_t hdr_size = hw->vtnet_hdr_size;
+	uint16_t nb_used, nb_tx = 0;
+
+	if (unlikely(nb_pkts < 1))
+		return nb_pkts;
+
+	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
+	nb_used = VIRTQUEUE_NUSED(vq);
+
+	virtio_rmb();
+	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
+		virtio_xmit_cleanup(vq, nb_used);
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		struct rte_mbuf *txm = tx_pkts[nb_tx];
+		int can_push = 0, use_indirect = 0, slots, need;
+
+		/* optimize ring usage */
+		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
+					vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
+			rte_mbuf_refcnt_read(txm) == 1 &&
+			RTE_MBUF_DIRECT(txm) &&
+			txm->nb_segs == 1 &&
+			rte_pktmbuf_headroom(txm) >= hdr_size &&
+			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
+				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
+			can_push = 1;
+		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
+			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
+			use_indirect = 1;
+
+		/* How many main ring entries are needed to this Tx?
+		 * any_layout => number of segments
+		 * indirect   => 1
+		 * default    => number of segments + 1
+		 */
+		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
+		need = slots - vq->vq_free_cnt;
+
+		/* Positive value indicates it need free vring descriptors */
+		if (unlikely(need > 0)) {
+			nb_used = VIRTQUEUE_NUSED(vq);
+			virtio_rmb();
+			need = RTE_MIN(need, (int)nb_used);
+
+			virtio_xmit_cleanup(vq, need);
+			need = slots - vq->vq_free_cnt;
+			if (unlikely(need > 0)) {
+				PMD_TX_LOG(ERR,
+						"No free tx descriptors to transmit");
+				break;
+			}
+		}
+
+		/* Enqueue Packet buffers */
+		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
+
+		txvq->stats.bytes += txm->pkt_len;
+		virtio_update_packet_stats(&txvq->stats, txm);
+	}
+
+	txvq->stats.packets += nb_tx;
+
+	if (likely(nb_tx)) {
+		vq_update_avail_idx(vq);
+
+		if (unlikely(virtqueue_kick_prepare(vq))) {
+			virtqueue_notify(vq);
+			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
+		}
+	}
+
+	return nb_tx;
+}
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2305d91..ed420e9 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE       3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-11-24 11:04 ` [dpdk-dev] [PATCH 2/2] " Xiao Wang
  2017-11-24  6:04   ` Tiwei Bie
@ 2017-11-27 12:48   ` Yuanhan Liu
  2017-11-30  2:41     ` Wang, Xiao W
  1 sibling, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2017-11-27 12:48 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev

On Fri, Nov 24, 2017 at 03:04:00AM -0800, Xiao Wang wrote:
> When live migration is done, for the backup VM, either the virtio
> frontend or the vhost backend needs to send out gratuitous RARP packet
> to announce its new network location.
> 
> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
> migration scenario where the vhost backend doesn't have the ability to
> generate RARP packet.

Yes, it's a feature good to have.

> +static int
> +virtio_dev_pause(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	if (hw->started == 0)
> +		return -1;
> +	hw->started = 0;
> +	/*
> +	 * Prevent the worker thread from touching queues to avoid condition,
> +	 * 1 ms should be enough for the ongoing Tx function to finish.
> +	 */
> +	rte_delay_ms(1);
> +	return 0;
> +}
> +
> +static void
> +virtio_dev_resume(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	hw->started = 1;
> +}

However, the implementation (stop first, pause for 1ms, duplicate another
Tx function, resume) doesn't seem elegant.

You probably could try something like DPDK vhost does:

- set a flag when S_ANNOUCE is received
- inject a pkt when such flag is set in the xmit function

You then should be able to get rid of all of above stuffs.

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe
  2017-11-24  5:38   ` Tiwei Bie
@ 2017-11-30  2:10     ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2017-11-30  2:10 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: dev, yliu



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Friday, November 24, 2017 1:39 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; yliu@fridaylinux.org
> Subject: Re: [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-
> safe
> 
> On Fri, Nov 24, 2017 at 03:03:59AM -0800, Xiao Wang wrote:
> [...]
> > diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> > index 54f1e84..24e3026 100644
> > --- a/drivers/net/virtio/virtio_rxtx.h
> > +++ b/drivers/net/virtio/virtio_rxtx.h
> > @@ -84,6 +84,7 @@ struct virtnet_ctl {
> >  	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
> >  	uint16_t port_id;               /**< Device port identifier. */
> >  	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring.
> */
> > +	rte_spinlock_t sl;				/**< spinlock for
> control queue. */
> 
> Please use spaces instead of (4 spaces) tabs between the code and comments.

Will change it in v2, thanks!

BRs,
Xiao
> 
> Best regards,
> Tiwei Bie

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-11-24  6:04   ` Tiwei Bie
@ 2017-11-30  2:37     ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2017-11-30  2:37 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: dev, yliu



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Friday, November 24, 2017 2:05 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; yliu@fridaylinux.org
> Subject: Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
> 
> Hi,
> 
> Some quick comments. Will go through the whole patch later.
> 
> On Fri, Nov 24, 2017 at 03:04:00AM -0800, Xiao Wang wrote:
> > When live migration is done, for the backup VM, either the virtio
> > frontend or the vhost backend needs to send out gratuitous RARP packet
> > to announce its new network location.
> >
> > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> live
> > migration scenario where the vhost backend doesn't have the ability to
> > generate RARP packet.
> >
> > Brief introduction of the work flow:
> > 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> > 2. Virtio interrupt handler reads out the interrupt status value, and
> >    realizes it needs to send out RARP packet to announce its location.
> > 3. Pause device to stop worker thread touching the queues.
> > 4. Inject a RARP packet into a Tx Queue.
> > 5. Ack the interrupt via control queue.
> > 6. Resume device to continue packet processing.
> >
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> >  drivers/net/virtio/virtio_ethdev.c | 131
> ++++++++++++++++++++++++++++++++++++-
> >  drivers/net/virtio/virtio_ethdev.h |   4 ++
> >  drivers/net/virtio/virtio_pci.h    |   1 +
> >  drivers/net/virtio/virtio_rxtx.c   |  81 +++++++++++++++++++++++
> >  drivers/net/virtio/virtqueue.h     |  11 ++++
> >  5 files changed, 226 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> > index 1959b11..6eaea0e 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -48,6 +48,8 @@
> >  #include <rte_pci.h>
> >  #include <rte_bus_pci.h>
> >  #include <rte_ether.h>
> > +#include <rte_ip.h>
> > +#include <rte_arp.h>
> >  #include <rte_common.h>
> >  #include <rte_errno.h>
> >  #include <rte_cpuflags.h>
> > @@ -55,6 +57,7 @@
> >  #include <rte_memory.h>
> >  #include <rte_eal.h>
> >  #include <rte_dev.h>
> > +#include <rte_cycles.h>
> >
> >  #include "virtio_ethdev.h"
> >  #include "virtio_pci.h"
> > @@ -106,6 +109,13 @@ static int virtio_dev_queue_stats_mapping_set(
> >  	uint8_t stat_idx,
> >  	uint8_t is_rx);
> >
> > +static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
> > +		const struct ether_addr *mac);
> > +static int virtio_dev_pause(struct rte_eth_dev *dev);
> > +static void virtio_dev_resume(struct rte_eth_dev *dev);
> > +static void generate_rarp(struct rte_eth_dev *dev);
> > +static void virtnet_ack_link_announce(struct rte_eth_dev *dev);
> > +
> >  /*
> >   * The set of PCI devices this driver supports
> >   */
> > @@ -1249,9 +1259,116 @@ static int virtio_dev_xstats_get_names(struct
> rte_eth_dev *dev,
> >  	return 0;
> >  }
> >
> > +#define RARP_PKT_SIZE	64
> > +
> > +static int
> > +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr
> *mac)
> > +{
> > +	struct ether_hdr *eth_hdr;
> > +	struct arp_hdr  *rarp;
> > +
> > +	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
> > +		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
> > +				rarp_mbuf->buf_len, RARP_PKT_SIZE);
> > +		return -1;
> > +	}
> > +
> > +	/* Ethernet header. */
> > +	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
> 
> You can use rte_pktmbuf_mtod() directly.

Looks better. Will change it in v2.

> 
> > +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> > +	ether_addr_copy(mac, &eth_hdr->s_addr);
> > +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> > +
> > +	/* RARP header. */
> > +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> > +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> > +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> > +	rarp->arp_hln = ETHER_ADDR_LEN;
> > +	rarp->arp_pln = 4;
> > +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> > +
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> > +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> > +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> > +
> > +	rarp_mbuf->data_len = RARP_PKT_SIZE;
> > +	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> > +virtio_dev_pause(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	if (hw->started == 0)
> > +		return -1;
> > +	hw->started = 0;
> > +	/*
> > +	 * Prevent the worker thread from touching queues to avoid condition,
> > +	 * 1 ms should be enough for the ongoing Tx function to finish.
> > +	 */
> > +	rte_delay_ms(1);
> > +	return 0;
> > +}
> > +
> > +static void
> > +virtio_dev_resume(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	hw->started = 1;
> > +}
> > +
> > +static void
> > +generate_rarp(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct rte_mbuf *rarp_mbuf = NULL;
> > +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> > +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> > +
> > +	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
> > +	if (rarp_mbuf == NULL) {
> > +		PMD_DRV_LOG(ERR, "mbuf allocate failed");
> > +		return;
> > +	}
> > +
> > +	if (make_rarp_packet(rarp_mbuf, (struct ether_addr *)hw->mac_addr))
> {
> > +		rte_pktmbuf_free(rarp_mbuf);
> > +		rarp_mbuf = NULL;
> > +		return;
> > +	}
> > +
> > +	/* If virtio port just stopped, no need to send RARP */
> > +	if (virtio_dev_pause(dev) < -1)
> 
> You mean < 0?

Yes, will fix it in v2.

> 
> > +		return;
> > +
> > +	virtio_inject_pkts(txvq, &rarp_mbuf, 1);
> > +	/* Recover the stored hw status to let worker thread continue */
> > +	virtio_dev_resume(dev);
> > +}
> > +
> > +static void
> > +virtnet_ack_link_announce(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct virtio_pmd_ctrl ctrl;
> > +	int len;
> > +
> > +	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
> > +	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
> > +	len = 0;
> > +
> > +	virtio_send_command(hw->cvq, &ctrl, &len, 0);
> > +}
> > +
> >  /*
> > - * Process Virtio Config changed interrupt and call the callback
> > - * if link state changed.
> > + * Process virtio config changed interrupt. Call the callback
> > + * if link state changed; generate gratuitous RARP packet if
> > + * the status indicates an ANNOUNCE.
> >   */
> >  void
> >  virtio_interrupt_handler(void *param)
> > @@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct
> rte_eth_dev *dev,
> >  						      NULL, NULL);
> >  	}
> >
> > +	if (isr & VIRTIO_NET_S_ANNOUNCE) {
> > +		rte_spinlock_lock(&hw->sl);
> > +		generate_rarp(dev);
> > +		virtnet_ack_link_announce(dev);
> > +		rte_spinlock_unlock(&hw->sl);
> > +	}
> >  }
> >
> >  /* set rx and tx handlers according to what is supported */
> > @@ -1786,6 +1909,8 @@ static int eth_virtio_pci_remove(struct
> rte_pci_device *pci_dev)
> >  			return -EBUSY;
> >  		}
> >
> > +	rte_spinlock_init(&hw->sl);
> > +
> >  	hw->use_simple_rx = 1;
> >  	hw->use_simple_tx = 1;
> >
> > @@ -1952,12 +2077,14 @@ static void virtio_dev_free_mbufs(struct
> rte_eth_dev *dev)
> >
> >  	PMD_INIT_LOG(DEBUG, "stop");
> >
> > +	rte_spinlock_lock(&hw->sl);
> >  	if (intr_conf->lsc || intr_conf->rxq)
> >  		virtio_intr_disable(dev);
> >
> >  	hw->started = 0;
> >  	memset(&link, 0, sizeof(link));
> >  	virtio_dev_atomic_write_link_status(dev, &link);
> > +	rte_spinlock_unlock(&hw->sl);
> >  }
> >
> >  static int
> > diff --git a/drivers/net/virtio/virtio_ethdev.h
> b/drivers/net/virtio/virtio_ethdev.h
> > index 2039bc5..24271cb 100644
> > --- a/drivers/net/virtio/virtio_ethdev.h
> > +++ b/drivers/net/virtio/virtio_ethdev.h
> > @@ -67,6 +67,7 @@
> >  	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
> >  	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
> >  	 1u << VIRTIO_NET_F_MTU	| \
> > +	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
> >  	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
> >  	 1ULL << VIRTIO_F_VERSION_1       |	\
> >  	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
> > @@ -111,6 +112,9 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue,
> struct rte_mbuf **rx_pkts,
> >  uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> >  		uint16_t nb_pkts);
> >
> > +uint16_t virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> > +		uint16_t nb_pkts);
> > +
> >  uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >  		uint16_t nb_pkts);
> >
> > diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> > index 3c5ce66..3cd367e 100644
> > --- a/drivers/net/virtio/virtio_pci.h
> > +++ b/drivers/net/virtio/virtio_pci.h
> > @@ -270,6 +270,7 @@ struct virtio_hw {
> >  	struct virtio_pci_common_cfg *common_cfg;
> >  	struct virtio_net_config *dev_cfg;
> >  	void	    *virtio_user_dev;
> > +	rte_spinlock_t sl;
> 
> Need to add some detailed comments to describe what's
> protected by this lock.

With this feature, the hw->started flag can be changed by two threads:
App management thread and the interrupt handler thread.
This lock can prevent such a case:

1. When LM is done, config change interrupt triggered, the handler pause the device. hw->started  = 0;
2. app stops virtio port. Hw->started = 0;
3. RARP injected in Tx queue, ack sent in Control queue, resume device. hw->started = 1; (but the port is stopped already)

> 
> >
> >  	struct virtqueue **vqs;
> >  };
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 6a24fde..7313bdd 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -1100,3 +1100,84 @@
> >
> >  	return nb_tx;
> >  }
> > +
> > +uint16_t
> > +virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
> > +{
> > +	struct virtnet_tx *txvq = tx_queue;
> > +	struct virtqueue *vq = txvq->vq;
> > +	struct virtio_hw *hw = vq->hw;
> > +	uint16_t hdr_size = hw->vtnet_hdr_size;
> > +	uint16_t nb_used, nb_tx = 0;
> > +
> > +	if (unlikely(nb_pkts < 1))
> > +		return nb_pkts;
> > +
> > +	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> > +	nb_used = VIRTQUEUE_NUSED(vq);
> > +
> > +	virtio_rmb();
> > +	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> > +		virtio_xmit_cleanup(vq, nb_used);
> > +
> > +	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> > +		struct rte_mbuf *txm = tx_pkts[nb_tx];
> > +		int can_push = 0, use_indirect = 0, slots, need;
> > +
> > +		/* optimize ring usage */
> > +		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
> > +					vtpci_with_feature(hw,
> VIRTIO_F_VERSION_1)) &&
> > +			rte_mbuf_refcnt_read(txm) == 1 &&
> > +			RTE_MBUF_DIRECT(txm) &&
> > +			txm->nb_segs == 1 &&
> > +			rte_pktmbuf_headroom(txm) >= hdr_size &&
> > +			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
> > +				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
> > +			can_push = 1;
> > +		else if (vtpci_with_feature(hw,
> VIRTIO_RING_F_INDIRECT_DESC) &&
> > +			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
> > +			use_indirect = 1;
> > +
> > +		/* How many main ring entries are needed to this Tx?
> > +		 * any_layout => number of segments
> > +		 * indirect   => 1
> > +		 * default    => number of segments + 1
> > +		 */
> > +		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
> > +		need = slots - vq->vq_free_cnt;
> > +
> > +		/* Positive value indicates it need free vring descriptors */
> > +		if (unlikely(need > 0)) {
> > +			nb_used = VIRTQUEUE_NUSED(vq);
> > +			virtio_rmb();
> > +			need = RTE_MIN(need, (int)nb_used);
> > +
> > +			virtio_xmit_cleanup(vq, need);
> > +			need = slots - vq->vq_free_cnt;
> > +			if (unlikely(need > 0)) {
> > +				PMD_TX_LOG(ERR,
> > +						"No free tx descriptors to
> transmit");
> > +				break;
> > +			}
> > +		}
> > +
> > +		/* Enqueue Packet buffers */
> > +		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
> can_push);
> > +
> > +		txvq->stats.bytes += txm->pkt_len;
> > +		virtio_update_packet_stats(&txvq->stats, txm);
> > +	}
> > +
> > +	txvq->stats.packets += nb_tx;
> > +
> > +	if (likely(nb_tx)) {
> > +		vq_update_avail_idx(vq);
> > +
> > +		if (unlikely(virtqueue_kick_prepare(vq))) {
> > +			virtqueue_notify(vq);
> > +			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
> > +		}
> > +	}
> > +
> > +	return nb_tx;
> > +}
> 
> What's the difference between virtio_inject_pkts() and
> virtio_xmit_pkts() except the latter will check hw->started?

No vlan tag insertion.
Actually they are both using virtqueue_enqueue_xmit() to enqueue packet.

Thanks for your comments.
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-11-27 12:48   ` Yuanhan Liu
@ 2017-11-30  2:41     ` Wang, Xiao W
  2017-12-05 14:26       ` Yuanhan Liu
  0 siblings, 1 reply; 112+ messages in thread
From: Wang, Xiao W @ 2017-11-30  2:41 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Bie, Tiwei



> -----Original Message-----
> From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> Sent: Monday, November 27, 2017 8:49 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
> 
> On Fri, Nov 24, 2017 at 03:04:00AM -0800, Xiao Wang wrote:
> > When live migration is done, for the backup VM, either the virtio
> > frontend or the vhost backend needs to send out gratuitous RARP packet
> > to announce its new network location.
> >
> > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> live
> > migration scenario where the vhost backend doesn't have the ability to
> > generate RARP packet.
> 
> Yes, it's a feature good to have.
> 
> > +static int
> > +virtio_dev_pause(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	if (hw->started == 0)
> > +		return -1;
> > +	hw->started = 0;
> > +	/*
> > +	 * Prevent the worker thread from touching queues to avoid condition,
> > +	 * 1 ms should be enough for the ongoing Tx function to finish.
> > +	 */
> > +	rte_delay_ms(1);
> > +	return 0;
> > +}
> > +
> > +static void
> > +virtio_dev_resume(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	hw->started = 1;
> > +}
> 
> However, the implementation (stop first, pause for 1ms, duplicate another
> Tx function, resume) doesn't seem elegant.
> 
> You probably could try something like DPDK vhost does:
> 
> - set a flag when S_ANNOUCE is received
> - inject a pkt when such flag is set in the xmit function
> 
> You then should be able to get rid of all of above stuffs.
> 
> 	--yliu

The difference is that the virtio port may just receive packet, without xmit.

Thanks for the comments,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe
  2017-11-24 11:03 ` [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe Xiao Wang
  2017-11-24  5:38   ` Tiwei Bie
@ 2017-11-30  2:59   ` Stephen Hemminger
  2017-12-01  1:38     ` Wang, Xiao W
  2017-12-04 14:02   ` [dpdk-dev] [PATCH v2 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 1 reply; 112+ messages in thread
From: Stephen Hemminger @ 2017-11-30  2:59 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu

On Fri, 24 Nov 2017 03:03:59 -0800
Xiao Wang <xiao.w.wang@intel.com> wrote:

> @@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
>  		"vq->hw->cvq = %p vq = %p",
>  		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
>  
> -	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
> +	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) {

You ndon't need so many paranthesis.  
	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1)

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe
  2017-11-30  2:59   ` Stephen Hemminger
@ 2017-12-01  1:38     ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2017-12-01  1:38 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, yliu



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, November 30, 2017 10:59 AM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; yliu@fridaylinux.org
> Subject: Re: [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-
> safe
> 
> On Fri, 24 Nov 2017 03:03:59 -0800
> Xiao Wang <xiao.w.wang@intel.com> wrote:
> 
> > @@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
> >  		"vq->hw->cvq = %p vq = %p",
> >  		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
> >
> > -	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
> > +	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) {
> 
> You ndon't need so many paranthesis.
> 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1)

Yes, it looks better. Will change it in v2.

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2017-12-04  8:46       ` Tiwei Bie
  2018-01-03  1:37         ` Wang, Xiao W
  2018-01-03  8:42         ` Wang, Xiao W
  2017-12-06 11:23       ` Tiwei Bie
  2018-01-04  7:41       ` [dpdk-dev] [PATCH v3 0/2] " Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Tiwei Bie @ 2017-12-04  8:46 UTC (permalink / raw)
  To: Xiao Wang; +Cc: yliu, dev, stephen

On Mon, Dec 04, 2017 at 06:02:08AM -0800, Xiao Wang wrote:
> When live migration is done, for the backup VM, either the virtio
> frontend or the vhost backend needs to send out gratuitous RARP packet
> to announce its new network location.
> 

To support GUEST ANNOUNCE, do we just need to send a RARP packet?
Will it work in an IPv6-only network?

> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
[...]
> +
> +static int
> +virtio_dev_pause(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	if (hw->started == 0)
> +		return -1;
> +	hw->started = 0;
> +	/*
> +	 * Prevent the worker thread from touching queues to avoid condition,

Typo. Avoid "contention"?

> +	 * 1 ms should be enough for the ongoing Tx function to finish.
> +	 */
> +	rte_delay_ms(1);
> +	return 0;
> +}
> +
> +static void
> +virtio_dev_resume(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	hw->started = 1;
> +}
> +
> +static void
> +generate_rarp(struct rte_eth_dev *dev)

You can give it a better name, e.g. virtio_notify_peers().

> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct rte_mbuf *rarp_mbuf = NULL;
> +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +
> +	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
> +	if (rarp_mbuf == NULL) {
> +		PMD_DRV_LOG(ERR, "mbuf allocate failed");
> +		return;
> +	}
> +
> +	if (make_rarp_packet(rarp_mbuf, (struct ether_addr *)hw->mac_addr)) {
> +		rte_pktmbuf_free(rarp_mbuf);
> +		rarp_mbuf = NULL;
> +		return;
> +	}
> +
> +	/* If virtio port just stopped, no need to send RARP */
> +	if (virtio_dev_pause(dev) < 0)
> +		return;
> +
> +	virtio_inject_pkts(txvq, &rarp_mbuf, 1);
> +	/* Recover the stored hw status to let worker thread continue */
> +	virtio_dev_resume(dev);
> +}
> +
> +static void
> +virtnet_ack_link_announce(struct rte_eth_dev *dev)

Why use "virtnet_" prefix? I think "virtio_" would be better.

> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtio_pmd_ctrl ctrl;
> +	int len;
> +
> +	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
> +	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
> +	len = 0;
> +
> +	virtio_send_command(hw->cvq, &ctrl, &len, 0);
> +}
> +
>  /*
> - * Process Virtio Config changed interrupt and call the callback
> - * if link state changed.
> + * Process virtio config changed interrupt. Call the callback
> + * if link state changed; generate gratuitous RARP packet if

Better to replace ";" with ","

> + * the status indicates an ANNOUNCE.
>   */
>  void
>  virtio_interrupt_handler(void *param)
> @@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
>  						      NULL, NULL);
>  	}
>  
> +	if (isr & VIRTIO_NET_S_ANNOUNCE) {
> +		rte_spinlock_lock(&hw->sl);
> +		generate_rarp(dev);

Just curious. Do you need to make sure that the RARP packet
would be sent successfully?

> +		virtnet_ack_link_announce(dev);
> +		rte_spinlock_unlock(&hw->sl);
> +	}
>  }
[...]
> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> index 3c5ce66..3cd367e 100644
> --- a/drivers/net/virtio/virtio_pci.h
> +++ b/drivers/net/virtio/virtio_pci.h
> @@ -270,6 +270,7 @@ struct virtio_hw {
>  	struct virtio_pci_common_cfg *common_cfg;
>  	struct virtio_net_config *dev_cfg;
>  	void	    *virtio_user_dev;
> +	rte_spinlock_t sl;

Some detailed comments need to be added in the code to
document the usage of this lock.

>  
>  	struct virtqueue **vqs;
>  };
[...]
> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> index 2305d91..ed420e9 100644
> --- a/drivers/net/virtio/virtqueue.h
> +++ b/drivers/net/virtio/virtqueue.h
> @@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
>  #define VIRTIO_NET_CTRL_VLAN_ADD 0
>  #define VIRTIO_NET_CTRL_VLAN_DEL 1
>  
> +/*
> + * Control link announce acknowledgement
> + *
> + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
> + * driver has recevied the notification; device would clear the
> + * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
> + * this command.
> + */
> +#define VIRTIO_NET_CTRL_ANNOUNCE       3
> +#define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0

You can just keep 3 and 0 in the same column.

Best regards,
Tiwei Bie

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v2 0/2] net/virtio: support GUEST ANNOUNCE
  2017-11-24 11:03 ` [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe Xiao Wang
  2017-11-24  5:38   ` Tiwei Bie
  2017-11-30  2:59   ` Stephen Hemminger
@ 2017-12-04 14:02   ` Xiao Wang
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 1/2] net/virtio: make control queue thread-safe Xiao Wang
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2017-12-04 14:02 UTC (permalink / raw)
  To: yliu; +Cc: dev, tiwei.bie, stephen, Xiao Wang

When live migration is finished, the backup VM needs to proactively announce
its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
generate a RARP packet to switch in dequeue path. Another method is to let
the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
feature.

This patch set enables this feature in virtio pmd, to support VM running virtio
pmd be migrated without vhost supporting RARP generation.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (2):
  net/virtio: make control queue thread-safe
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c | 138 ++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |   4 ++
 drivers/net/virtio/virtio_pci.h    |   1 +
 drivers/net/virtio/virtio_rxtx.c   |  82 ++++++++++++++++++++++
 drivers/net/virtio/virtio_rxtx.h   |   1 +
 drivers/net/virtio/virtqueue.h     |  11 +++
 6 files changed, 234 insertions(+), 3 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v2 1/2] net/virtio: make control queue thread-safe
  2017-12-04 14:02   ` [dpdk-dev] [PATCH v2 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2017-12-04 14:02     ` Xiao Wang
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  1 sibling, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2017-12-04 14:02 UTC (permalink / raw)
  To: yliu; +Cc: dev, tiwei.bie, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e0328f6..ac73950 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -177,6 +177,8 @@ struct rte_virtio_xstats_name_off {
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->sl);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->sl);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -261,6 +265,7 @@ struct rte_virtio_xstats_name_off {
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->sl);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 390c137..6a24fde 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -407,6 +407,7 @@
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->sl);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 54f1e84..71b5798 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -84,6 +84,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t sl;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-04 14:02   ` [dpdk-dev] [PATCH v2 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 1/2] net/virtio: make control queue thread-safe Xiao Wang
@ 2017-12-04 14:02     ` Xiao Wang
  2017-12-04  8:46       ` Tiwei Bie
                         ` (2 more replies)
  1 sibling, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2017-12-04 14:02 UTC (permalink / raw)
  To: yliu; +Cc: dev, tiwei.bie, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
v2:
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.
---
 drivers/net/virtio/virtio_ethdev.c | 131 ++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |   4 ++
 drivers/net/virtio/virtio_pci.h    |   1 +
 drivers/net/virtio/virtio_rxtx.c   |  81 +++++++++++++++++++++++
 drivers/net/virtio/virtqueue.h     |  11 ++++
 5 files changed, 226 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ac73950..4c937c6 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -48,6 +48,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -55,6 +57,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -106,6 +109,13 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static int virtio_dev_pause(struct rte_eth_dev *dev);
+static void virtio_dev_resume(struct rte_eth_dev *dev);
+static void generate_rarp(struct rte_eth_dev *dev);
+static void virtnet_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1249,9 +1259,116 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	return 0;
 }
 
+#define RARP_PKT_SIZE	64
+
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (hw->started == 0)
+		return -1;
+	hw->started = 0;
+	/*
+	 * Prevent the worker thread from touching queues to avoid condition,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+static void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+}
+
+static void
+generate_rarp(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+
+	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (make_rarp_packet(rarp_mbuf, (struct ether_addr *)hw->mac_addr)) {
+		rte_pktmbuf_free(rarp_mbuf);
+		rarp_mbuf = NULL;
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0)
+		return;
+
+	virtio_inject_pkts(txvq, &rarp_mbuf, 1);
+	/* Recover the stored hw status to let worker thread continue */
+	virtio_dev_resume(dev);
+}
+
+static void
+virtnet_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+	len = 0;
+
+	virtio_send_command(hw->cvq, &ctrl, &len, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed; generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		rte_spinlock_lock(&hw->sl);
+		generate_rarp(dev);
+		virtnet_ack_link_announce(dev);
+		rte_spinlock_unlock(&hw->sl);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
@@ -1786,6 +1909,8 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->sl);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1952,12 +2077,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->sl);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->sl);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 2039bc5..24271cb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
@@ -111,6 +112,9 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 3c5ce66..3cd367e 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -270,6 +270,7 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	rte_spinlock_t sl;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6a24fde..7313bdd 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -1100,3 +1100,84 @@
 
 	return nb_tx;
 }
+
+uint16_t
+virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct virtnet_tx *txvq = tx_queue;
+	struct virtqueue *vq = txvq->vq;
+	struct virtio_hw *hw = vq->hw;
+	uint16_t hdr_size = hw->vtnet_hdr_size;
+	uint16_t nb_used, nb_tx = 0;
+
+	if (unlikely(nb_pkts < 1))
+		return nb_pkts;
+
+	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
+	nb_used = VIRTQUEUE_NUSED(vq);
+
+	virtio_rmb();
+	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
+		virtio_xmit_cleanup(vq, nb_used);
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		struct rte_mbuf *txm = tx_pkts[nb_tx];
+		int can_push = 0, use_indirect = 0, slots, need;
+
+		/* optimize ring usage */
+		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
+					vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
+			rte_mbuf_refcnt_read(txm) == 1 &&
+			RTE_MBUF_DIRECT(txm) &&
+			txm->nb_segs == 1 &&
+			rte_pktmbuf_headroom(txm) >= hdr_size &&
+			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
+				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
+			can_push = 1;
+		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
+			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
+			use_indirect = 1;
+
+		/* How many main ring entries are needed to this Tx?
+		 * any_layout => number of segments
+		 * indirect   => 1
+		 * default    => number of segments + 1
+		 */
+		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
+		need = slots - vq->vq_free_cnt;
+
+		/* Positive value indicates it need free vring descriptors */
+		if (unlikely(need > 0)) {
+			nb_used = VIRTQUEUE_NUSED(vq);
+			virtio_rmb();
+			need = RTE_MIN(need, (int)nb_used);
+
+			virtio_xmit_cleanup(vq, need);
+			need = slots - vq->vq_free_cnt;
+			if (unlikely(need > 0)) {
+				PMD_TX_LOG(ERR,
+						"No free tx descriptors to transmit");
+				break;
+			}
+		}
+
+		/* Enqueue Packet buffers */
+		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
+
+		txvq->stats.bytes += txm->pkt_len;
+		virtio_update_packet_stats(&txvq->stats, txm);
+	}
+
+	txvq->stats.packets += nb_tx;
+
+	if (likely(nb_tx)) {
+		vq_update_avail_idx(vq);
+
+		if (unlikely(virtqueue_kick_prepare(vq))) {
+			virtqueue_notify(vq);
+			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
+		}
+	}
+
+	return nb_tx;
+}
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2305d91..ed420e9 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE       3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-11-30  2:41     ` Wang, Xiao W
@ 2017-12-05 14:26       ` Yuanhan Liu
  2018-01-03  1:43         ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2017-12-05 14:26 UTC (permalink / raw)
  To: Wang, Xiao W; +Cc: dev, Bie, Tiwei

On Thu, Nov 30, 2017 at 02:41:12AM +0000, Wang, Xiao W wrote:
> 
> 
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> > Sent: Monday, November 27, 2017 8:49 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: dev@dpdk.org
> > Subject: Re: [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
> > 
> > On Fri, Nov 24, 2017 at 03:04:00AM -0800, Xiao Wang wrote:
> > > When live migration is done, for the backup VM, either the virtio
> > > frontend or the vhost backend needs to send out gratuitous RARP packet
> > > to announce its new network location.
> > >
> > > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> > live
> > > migration scenario where the vhost backend doesn't have the ability to
> > > generate RARP packet.
> > 
> > Yes, it's a feature good to have.
> > 
> > > +static int
> > > +virtio_dev_pause(struct rte_eth_dev *dev)
> > > +{
> > > +	struct virtio_hw *hw = dev->data->dev_private;
> > > +
> > > +	if (hw->started == 0)
> > > +		return -1;
> > > +	hw->started = 0;
> > > +	/*
> > > +	 * Prevent the worker thread from touching queues to avoid condition,
> > > +	 * 1 ms should be enough for the ongoing Tx function to finish.
> > > +	 */
> > > +	rte_delay_ms(1);
> > > +	return 0;
> > > +}
> > > +
> > > +static void
> > > +virtio_dev_resume(struct rte_eth_dev *dev)
> > > +{
> > > +	struct virtio_hw *hw = dev->data->dev_private;
> > > +
> > > +	hw->started = 1;
> > > +}
> > 
> > However, the implementation (stop first, pause for 1ms, duplicate another
> > Tx function, resume) doesn't seem elegant.
> > 
> > You probably could try something like DPDK vhost does:
> > 
> > - set a flag when S_ANNOUCE is received
> > - inject a pkt when such flag is set in the xmit function
> > 
> > You then should be able to get rid of all of above stuffs.
> > 
> > 	--yliu
> 
> The difference is that the virtio port may just receive packet, without xmit.

Thanks, I missed that.

However, you really should not add a duplicate function. It adds more
maintain effort. I think you probably could just invoke the tx_pkt_burst
callback directly. You have stopped the device after all. What's the
necessary to duplicate it?

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2017-12-04  8:46       ` Tiwei Bie
@ 2017-12-06 11:23       ` Tiwei Bie
  2017-12-06 14:22         ` Yuanhan Liu
  2018-01-03  1:41         ` Wang, Xiao W
  2018-01-04  7:41       ` [dpdk-dev] [PATCH v3 0/2] " Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Tiwei Bie @ 2017-12-06 11:23 UTC (permalink / raw)
  To: Xiao Wang; +Cc: yliu, dev, stephen

On Mon, Dec 04, 2017 at 06:02:08AM -0800, Xiao Wang wrote:
[...]
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index 6a24fde..7313bdd 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -1100,3 +1100,84 @@
>  
>  	return nb_tx;
>  }
> +
> +uint16_t
> +virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> +{
> +	struct virtnet_tx *txvq = tx_queue;
> +	struct virtqueue *vq = txvq->vq;
> +	struct virtio_hw *hw = vq->hw;
> +	uint16_t hdr_size = hw->vtnet_hdr_size;
> +	uint16_t nb_used, nb_tx = 0;
> +
> +	if (unlikely(nb_pkts < 1))
> +		return nb_pkts;
> +
> +	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> +	nb_used = VIRTQUEUE_NUSED(vq);
> +
> +	virtio_rmb();
> +	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> +		virtio_xmit_cleanup(vq, nb_used);
> +
> +	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> +		struct rte_mbuf *txm = tx_pkts[nb_tx];
> +		int can_push = 0, use_indirect = 0, slots, need;
> +
> +		/* optimize ring usage */
> +		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
> +					vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
> +			rte_mbuf_refcnt_read(txm) == 1 &&
> +			RTE_MBUF_DIRECT(txm) &&
> +			txm->nb_segs == 1 &&
> +			rte_pktmbuf_headroom(txm) >= hdr_size &&
> +			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
> +				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
> +			can_push = 1;
> +		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
> +			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
> +			use_indirect = 1;
> +
> +		/* How many main ring entries are needed to this Tx?
> +		 * any_layout => number of segments
> +		 * indirect   => 1
> +		 * default    => number of segments + 1
> +		 */
> +		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
> +		need = slots - vq->vq_free_cnt;
> +
> +		/* Positive value indicates it need free vring descriptors */
> +		if (unlikely(need > 0)) {
> +			nb_used = VIRTQUEUE_NUSED(vq);
> +			virtio_rmb();
> +			need = RTE_MIN(need, (int)nb_used);
> +
> +			virtio_xmit_cleanup(vq, need);
> +			need = slots - vq->vq_free_cnt;
> +			if (unlikely(need > 0)) {
> +				PMD_TX_LOG(ERR,
> +						"No free tx descriptors to transmit");
> +				break;
> +			}
> +		}
> +
> +		/* Enqueue Packet buffers */
> +		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
> +
> +		txvq->stats.bytes += txm->pkt_len;
> +		virtio_update_packet_stats(&txvq->stats, txm);
> +	}
> +
> +	txvq->stats.packets += nb_tx;
> +
> +	if (likely(nb_tx)) {
> +		vq_update_avail_idx(vq);
> +
> +		if (unlikely(virtqueue_kick_prepare(vq))) {
> +			virtqueue_notify(vq);
> +			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
> +		}
> +	}
> +
> +	return nb_tx;
> +}

Simple Tx has some special assumptions and setups of the txq.
Basically the current implementation of virtio_inject_pkts()
is a mirror of virtio_xmit_pkts(). So when simple Tx function
is chosen, calling virtio_inject_pkts() could cause problems.

Best regards,
Tiwei Bie

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-06 11:23       ` Tiwei Bie
@ 2017-12-06 14:22         ` Yuanhan Liu
  2018-01-03  1:41         ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Yuanhan Liu @ 2017-12-06 14:22 UTC (permalink / raw)
  To: Tiwei Bie; +Cc: Xiao Wang, dev, stephen

On Wed, Dec 06, 2017 at 07:23:11PM +0800, Tiwei Bie wrote:
> On Mon, Dec 04, 2017 at 06:02:08AM -0800, Xiao Wang wrote:
> [...]
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 6a24fde..7313bdd 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -1100,3 +1100,84 @@
> >  
> >  	return nb_tx;
> >  }
> > +
> > +uint16_t
> > +virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> > +{
> > +	struct virtnet_tx *txvq = tx_queue;
> > +	struct virtqueue *vq = txvq->vq;
> > +	struct virtio_hw *hw = vq->hw;
> > +	uint16_t hdr_size = hw->vtnet_hdr_size;
> > +	uint16_t nb_used, nb_tx = 0;
> > +
> > +	if (unlikely(nb_pkts < 1))
> > +		return nb_pkts;
> > +
> > +	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> > +	nb_used = VIRTQUEUE_NUSED(vq);
> > +
> > +	virtio_rmb();
> > +	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> > +		virtio_xmit_cleanup(vq, nb_used);
> > +
> > +	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> > +		struct rte_mbuf *txm = tx_pkts[nb_tx];
> > +		int can_push = 0, use_indirect = 0, slots, need;
> > +
> > +		/* optimize ring usage */
> > +		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
> > +					vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
> > +			rte_mbuf_refcnt_read(txm) == 1 &&
> > +			RTE_MBUF_DIRECT(txm) &&
> > +			txm->nb_segs == 1 &&
> > +			rte_pktmbuf_headroom(txm) >= hdr_size &&
> > +			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
> > +				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
> > +			can_push = 1;
> > +		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
> > +			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
> > +			use_indirect = 1;
> > +
> > +		/* How many main ring entries are needed to this Tx?
> > +		 * any_layout => number of segments
> > +		 * indirect   => 1
> > +		 * default    => number of segments + 1
> > +		 */
> > +		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
> > +		need = slots - vq->vq_free_cnt;
> > +
> > +		/* Positive value indicates it need free vring descriptors */
> > +		if (unlikely(need > 0)) {
> > +			nb_used = VIRTQUEUE_NUSED(vq);
> > +			virtio_rmb();
> > +			need = RTE_MIN(need, (int)nb_used);
> > +
> > +			virtio_xmit_cleanup(vq, need);
> > +			need = slots - vq->vq_free_cnt;
> > +			if (unlikely(need > 0)) {
> > +				PMD_TX_LOG(ERR,
> > +						"No free tx descriptors to transmit");
> > +				break;
> > +			}
> > +		}
> > +
> > +		/* Enqueue Packet buffers */
> > +		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
> > +
> > +		txvq->stats.bytes += txm->pkt_len;
> > +		virtio_update_packet_stats(&txvq->stats, txm);
> > +	}
> > +
> > +	txvq->stats.packets += nb_tx;
> > +
> > +	if (likely(nb_tx)) {
> > +		vq_update_avail_idx(vq);
> > +
> > +		if (unlikely(virtqueue_kick_prepare(vq))) {
> > +			virtqueue_notify(vq);
> > +			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
> > +		}
> > +	}
> > +
> > +	return nb_tx;
> > +}
> 
> Simple Tx has some special assumptions and setups of the txq.
> Basically the current implementation of virtio_inject_pkts()
> is a mirror of virtio_xmit_pkts(). So when simple Tx function
> is chosen, calling virtio_inject_pkts() could cause problems.

That's why I suggested to invoke the tx_pkt_burst callback directly.

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-04  8:46       ` Tiwei Bie
@ 2018-01-03  1:37         ` Wang, Xiao W
  2018-01-03  8:42         ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-03  1:37 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: yliu, dev, stephen

Hi,

> -----Original Message-----
> From: Bie, Tiwei
> Sent: Monday, December 4, 2017 4:47 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; dev@dpdk.org; stephen@networkplumber.org
> Subject: Re: [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
> 
> On Mon, Dec 04, 2017 at 06:02:08AM -0800, Xiao Wang wrote:
> > When live migration is done, for the backup VM, either the virtio
> > frontend or the vhost backend needs to send out gratuitous RARP packet
> > to announce its new network location.
> >
> 
> To support GUEST ANNOUNCE, do we just need to send a RARP packet?
> Will it work in an IPv6-only network?

Will try to send out another one for IPv6 in next version.

> 
> > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> live
> [...]
> > +
> > +static int
> > +virtio_dev_pause(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	if (hw->started == 0)
> > +		return -1;
> > +	hw->started = 0;
> > +	/*
> > +	 * Prevent the worker thread from touching queues to avoid condition,
> 
> Typo. Avoid "contention"?

Will  fix it in next version.

> 
> > +	 * 1 ms should be enough for the ongoing Tx function to finish.
> > +	 */
> > +	rte_delay_ms(1);
> > +	return 0;
> > +}
> > +
> > +static void
> > +virtio_dev_resume(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	hw->started = 1;
> > +}
> > +
> > +static void
> > +generate_rarp(struct rte_eth_dev *dev)
> 
> You can give it a better name, e.g. virtio_notify_peers().

Good suggestion.

> 
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct rte_mbuf *rarp_mbuf = NULL;
> > +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> > +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> > +
> > +	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
> > +	if (rarp_mbuf == NULL) {
> > +		PMD_DRV_LOG(ERR, "mbuf allocate failed");
> > +		return;
> > +	}
> > +
> > +	if (make_rarp_packet(rarp_mbuf, (struct ether_addr *)hw->mac_addr))
> {
> > +		rte_pktmbuf_free(rarp_mbuf);
> > +		rarp_mbuf = NULL;
> > +		return;
> > +	}
> > +
> > +	/* If virtio port just stopped, no need to send RARP */
> > +	if (virtio_dev_pause(dev) < 0)
> > +		return;
> > +
> > +	virtio_inject_pkts(txvq, &rarp_mbuf, 1);
> > +	/* Recover the stored hw status to let worker thread continue */
> > +	virtio_dev_resume(dev);
> > +}
> > +
> > +static void
> > +virtnet_ack_link_announce(struct rte_eth_dev *dev)
> 
> Why use "virtnet_" prefix? I think "virtio_" would be better.

Yes, that would be similar to other function names in this file.

> 
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct virtio_pmd_ctrl ctrl;
> > +	int len;
> > +
> > +	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
> > +	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
> > +	len = 0;
> > +
> > +	virtio_send_command(hw->cvq, &ctrl, &len, 0);
> > +}
> > +
> >  /*
> > - * Process Virtio Config changed interrupt and call the callback
> > - * if link state changed.
> > + * Process virtio config changed interrupt. Call the callback
> > + * if link state changed; generate gratuitous RARP packet if
> 
> Better to replace ";" with ","

OK. Will update in next version.

> 
> > + * the status indicates an ANNOUNCE.
> >   */
> >  void
> >  virtio_interrupt_handler(void *param)
> > @@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct
> rte_eth_dev *dev,
> >  						      NULL, NULL);
> >  	}
> >
> > +	if (isr & VIRTIO_NET_S_ANNOUNCE) {
> > +		rte_spinlock_lock(&hw->sl);
> > +		generate_rarp(dev);
> 
> Just curious. Do you need to make sure that the RARP packet
> would be sent successfully?

The pause will make the ring get drained.

> 
> > +		virtnet_ack_link_announce(dev);
> > +		rte_spinlock_unlock(&hw->sl);
> > +	}
> >  }
> [...]
> > diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> > index 3c5ce66..3cd367e 100644
> > --- a/drivers/net/virtio/virtio_pci.h
> > +++ b/drivers/net/virtio/virtio_pci.h
> > @@ -270,6 +270,7 @@ struct virtio_hw {
> >  	struct virtio_pci_common_cfg *common_cfg;
> >  	struct virtio_net_config *dev_cfg;
> >  	void	    *virtio_user_dev;
> > +	rte_spinlock_t sl;
> 
> Some detailed comments need to be added in the code to
> document the usage of this lock.

OK. Will add it in v3.

> 
> >
> >  	struct virtqueue **vqs;
> >  };
> [...]
> > diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> > index 2305d91..ed420e9 100644
> > --- a/drivers/net/virtio/virtqueue.h
> > +++ b/drivers/net/virtio/virtqueue.h
> > @@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
> >  #define VIRTIO_NET_CTRL_VLAN_ADD 0
> >  #define VIRTIO_NET_CTRL_VLAN_DEL 1
> >
> > +/*
> > + * Control link announce acknowledgement
> > + *
> > + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate
> that
> > + * driver has recevied the notification; device would clear the
> > + * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
> > + * this command.
> > + */
> > +#define VIRTIO_NET_CTRL_ANNOUNCE       3
> > +#define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
> 
> You can just keep 3 and 0 in the same column.

Will make it in v3.

Thanks for the comments,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-06 11:23       ` Tiwei Bie
  2017-12-06 14:22         ` Yuanhan Liu
@ 2018-01-03  1:41         ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-03  1:41 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: yliu, dev, stephen



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Wednesday, December 6, 2017 7:23 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; dev@dpdk.org; stephen@networkplumber.org
> Subject: Re: [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
> 
> On Mon, Dec 04, 2017 at 06:02:08AM -0800, Xiao Wang wrote:
> [...]
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 6a24fde..7313bdd 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -1100,3 +1100,84 @@
> >
> >  	return nb_tx;
> >  }
> > +
> > +uint16_t
> > +virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
> > +{
> > +	struct virtnet_tx *txvq = tx_queue;
> > +	struct virtqueue *vq = txvq->vq;
> > +	struct virtio_hw *hw = vq->hw;
> > +	uint16_t hdr_size = hw->vtnet_hdr_size;
> > +	uint16_t nb_used, nb_tx = 0;
> > +
> > +	if (unlikely(nb_pkts < 1))
> > +		return nb_pkts;
> > +
> > +	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> > +	nb_used = VIRTQUEUE_NUSED(vq);
> > +
> > +	virtio_rmb();
> > +	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> > +		virtio_xmit_cleanup(vq, nb_used);
> > +
> > +	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> > +		struct rte_mbuf *txm = tx_pkts[nb_tx];
> > +		int can_push = 0, use_indirect = 0, slots, need;
> > +
> > +		/* optimize ring usage */
> > +		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
> > +					vtpci_with_feature(hw,
> VIRTIO_F_VERSION_1)) &&
> > +			rte_mbuf_refcnt_read(txm) == 1 &&
> > +			RTE_MBUF_DIRECT(txm) &&
> > +			txm->nb_segs == 1 &&
> > +			rte_pktmbuf_headroom(txm) >= hdr_size &&
> > +			rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
> > +				__alignof__(struct virtio_net_hdr_mrg_rxbuf)))
> > +			can_push = 1;
> > +		else if (vtpci_with_feature(hw,
> VIRTIO_RING_F_INDIRECT_DESC) &&
> > +			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
> > +			use_indirect = 1;
> > +
> > +		/* How many main ring entries are needed to this Tx?
> > +		 * any_layout => number of segments
> > +		 * indirect   => 1
> > +		 * default    => number of segments + 1
> > +		 */
> > +		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
> > +		need = slots - vq->vq_free_cnt;
> > +
> > +		/* Positive value indicates it need free vring descriptors */
> > +		if (unlikely(need > 0)) {
> > +			nb_used = VIRTQUEUE_NUSED(vq);
> > +			virtio_rmb();
> > +			need = RTE_MIN(need, (int)nb_used);
> > +
> > +			virtio_xmit_cleanup(vq, need);
> > +			need = slots - vq->vq_free_cnt;
> > +			if (unlikely(need > 0)) {
> > +				PMD_TX_LOG(ERR,
> > +						"No free tx descriptors to
> transmit");
> > +				break;
> > +			}
> > +		}
> > +
> > +		/* Enqueue Packet buffers */
> > +		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
> can_push);
> > +
> > +		txvq->stats.bytes += txm->pkt_len;
> > +		virtio_update_packet_stats(&txvq->stats, txm);
> > +	}
> > +
> > +	txvq->stats.packets += nb_tx;
> > +
> > +	if (likely(nb_tx)) {
> > +		vq_update_avail_idx(vq);
> > +
> > +		if (unlikely(virtqueue_kick_prepare(vq))) {
> > +			virtqueue_notify(vq);
> > +			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
> > +		}
> > +	}
> > +
> > +	return nb_tx;
> > +}
> 
> Simple Tx has some special assumptions and setups of the txq.
> Basically the current implementation of virtio_inject_pkts()
> is a mirror of virtio_xmit_pkts(). So when simple Tx function
> is chosen, calling virtio_inject_pkts() could cause problems.

I will have a static mbuf ** pointer for rarp packets in next version, which can also avoid code duplication.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-05 14:26       ` Yuanhan Liu
@ 2018-01-03  1:43         ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-03  1:43 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Bie, Tiwei



> -----Original Message-----
> From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> Sent: Tuesday, December 5, 2017 10:26 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; Bie, Tiwei <tiwei.bie@intel.com>
> Subject: Re: [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
> 
> On Thu, Nov 30, 2017 at 02:41:12AM +0000, Wang, Xiao W wrote:
> >
> >
> > > -----Original Message-----
> > > From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> > > Sent: Monday, November 27, 2017 8:49 PM
> > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > Cc: dev@dpdk.org
> > > Subject: Re: [PATCH 2/2] net/virtio: support GUEST ANNOUNCE
> > >
> > > On Fri, Nov 24, 2017 at 03:04:00AM -0800, Xiao Wang wrote:
> > > > When live migration is done, for the backup VM, either the virtio
> > > > frontend or the vhost backend needs to send out gratuitous RARP packet
> > > > to announce its new network location.
> > > >
> > > > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> > > live
> > > > migration scenario where the vhost backend doesn't have the ability to
> > > > generate RARP packet.
> > >
> > > Yes, it's a feature good to have.
> > >
> > > > +static int
> > > > +virtio_dev_pause(struct rte_eth_dev *dev)
> > > > +{
> > > > +	struct virtio_hw *hw = dev->data->dev_private;
> > > > +
> > > > +	if (hw->started == 0)
> > > > +		return -1;
> > > > +	hw->started = 0;
> > > > +	/*
> > > > +	 * Prevent the worker thread from touching queues to avoid condition,
> > > > +	 * 1 ms should be enough for the ongoing Tx function to finish.
> > > > +	 */
> > > > +	rte_delay_ms(1);
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static void
> > > > +virtio_dev_resume(struct rte_eth_dev *dev)
> > > > +{
> > > > +	struct virtio_hw *hw = dev->data->dev_private;
> > > > +
> > > > +	hw->started = 1;
> > > > +}
> > >
> > > However, the implementation (stop first, pause for 1ms, duplicate another
> > > Tx function, resume) doesn't seem elegant.
> > >
> > > You probably could try something like DPDK vhost does:
> > >
> > > - set a flag when S_ANNOUCE is received
> > > - inject a pkt when such flag is set in the xmit function
> > >
> > > You then should be able to get rid of all of above stuffs.
> > >
> > > 	--yliu
> >
> > The difference is that the virtio port may just receive packet, without xmit.
> 
> Thanks, I missed that.
> 
> However, you really should not add a duplicate function. It adds more
> maintain effort. I think you probably could just invoke the tx_pkt_burst
> callback directly. You have stopped the device after all. What's the
> necessary to duplicate it?
> 
> 	--yliu

To just invoke the tx_pkt_burst callback will be ideal.
I will have a static mbuf ** pointer for rarp packets in next version, then the tx_pkt_burst function will let rarp packet pass even if the queue is stopped.
This can avoid code duplication.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
  2017-12-04  8:46       ` Tiwei Bie
  2018-01-03  1:37         ` Wang, Xiao W
@ 2018-01-03  8:42         ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-03  8:42 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: yliu, dev, stephen



> -----Original Message-----
> From: Wang, Xiao W
> Sent: Wednesday, January 3, 2018 9:37 AM
> To: Bie, Tiwei <tiwei.bie@intel.com>
> Cc: yliu@fridaylinux.org; dev@dpdk.org; stephen@networkplumber.org
> Subject: RE: [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
> 
> Hi,
> 
> > -----Original Message-----
> > From: Bie, Tiwei
> > Sent: Monday, December 4, 2017 4:47 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: yliu@fridaylinux.org; dev@dpdk.org; stephen@networkplumber.org
> > Subject: Re: [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE
> >
> > On Mon, Dec 04, 2017 at 06:02:08AM -0800, Xiao Wang wrote:
> > > When live migration is done, for the backup VM, either the virtio
> > > frontend or the vhost backend needs to send out gratuitous RARP packet
> > > to announce its new network location.
> > >
> >
> > To support GUEST ANNOUNCE, do we just need to send a RARP packet?
> > Will it work in an IPv6-only network?
> 
> Will try to send out another one for IPv6 in next version.

Checking kernel's implementation on IPv6, it's associated with netdev configuration, in DPDK there's no such concept.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE
  2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-04  2:51           ` Tiwei Bie
  2018-01-04  7:11             ` Wang, Xiao W
  2018-01-04 15:59           ` [dpdk-dev] [PATCH v4 0/3] " Xiao Wang
  1 sibling, 1 reply; 112+ messages in thread
From: Tiwei Bie @ 2018-01-04  2:51 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu, stephen

Hi Xiao,

On Wed, Jan 03, 2018 at 11:41:40PM -0800, Xiao Wang wrote:
[...]
> +static int
> +virtio_dev_pause(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	if (hw->started == 0)
> +		return -1;
> +	hw->started = 0;
> +	/*
> +	 * Prevent the worker thread from touching queues to avoid contention,
> +	 * 1 ms should be enough for the ongoing Tx function to finish.
> +	 */
> +	rte_delay_ms(1);
> +	return 0;
> +}
> +
> +static void
> +virtio_dev_resume(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	hw->started = 1;
> +}

Based on your current implementation, hw->state_lock needs to
be held during a call of virtio_dev_pause()..virtio_dev_resume().
So I think the code would be more readable and much easier to
use if we take the lock in virtio_dev_pause() and release the
lock in virtio_dev_resume().

> +
> +static void
> +virtio_notify_peers(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +
> +	hw->rarp_buf[0] = rte_mbuf_raw_alloc(rxvq->mpool);
> +	if (hw->rarp_buf[0] == NULL) {
> +		PMD_DRV_LOG(ERR, "first mbuf allocate failed");
> +		return;
> +	}
> +
> +	if (make_rarp_packet(hw->rarp_buf[0],
> +				(struct ether_addr *)hw->mac_addr)) {
> +		rte_pktmbuf_free(hw->rarp_buf[0]);
> +		return;
> +	}
> +
> +	/* If virtio port just stopped, no need to send RARP */
> +	if (virtio_dev_pause(dev) < 0) {
> +		rte_pktmbuf_free(hw->rarp_buf[0]);
> +		return;
> +	}
> +
> +	dev->tx_pkt_burst(txvq, hw->rarp_buf, 1);

You have already provided virtio_dev_pause()/virtio_dev_resume().
I think you can also make this part generic and provide an inject
function, e.g.:

uint16_t
virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
nb_pkts)
{
	......

	txvq->inject_pkts = tx_pkts;
	nb_tx = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
	txvq->inject_pkts = NULL;

	return nb_tx;
}

And you can introduce virtio_dev_pause()/virtio_dev_resume()/
virtio_injec... in a separate patch. And introduce the GUEST
ANNOUNCE support in the third patch.

Thanks,
Tiwei

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE
  2018-01-04  2:51           ` Tiwei Bie
@ 2018-01-04  7:11             ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-04  7:11 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: dev, yliu, stephen

Hi Tiwei,

> -----Original Message-----
> From: Bie, Tiwei
> Sent: Thursday, January 4, 2018 10:51 AM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; yliu@fridaylinux.org; stephen@networkplumber.org
> Subject: Re: [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE
> 
> Hi Xiao,
> 
> On Wed, Jan 03, 2018 at 11:41:40PM -0800, Xiao Wang wrote:
> [...]
> > +static int
> > +virtio_dev_pause(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	if (hw->started == 0)
> > +		return -1;
> > +	hw->started = 0;
> > +	/*
> > +	 * Prevent the worker thread from touching queues to avoid
> contention,
> > +	 * 1 ms should be enough for the ongoing Tx function to finish.
> > +	 */
> > +	rte_delay_ms(1);
> > +	return 0;
> > +}
> > +
> > +static void
> > +virtio_dev_resume(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	hw->started = 1;
> > +}
> 
> Based on your current implementation, hw->state_lock needs to
> be held during a call of virtio_dev_pause()..virtio_dev_resume().
> So I think the code would be more readable and much easier to
> use if we take the lock in virtio_dev_pause() and release the
> lock in virtio_dev_resume().

Agree, will improve it in next version.

> 
> > +
> > +static void
> > +virtio_notify_peers(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> > +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> > +
> > +	hw->rarp_buf[0] = rte_mbuf_raw_alloc(rxvq->mpool);
> > +	if (hw->rarp_buf[0] == NULL) {
> > +		PMD_DRV_LOG(ERR, "first mbuf allocate failed");
> > +		return;
> > +	}
> > +
> > +	if (make_rarp_packet(hw->rarp_buf[0],
> > +				(struct ether_addr *)hw->mac_addr)) {
> > +		rte_pktmbuf_free(hw->rarp_buf[0]);
> > +		return;
> > +	}
> > +
> > +	/* If virtio port just stopped, no need to send RARP */
> > +	if (virtio_dev_pause(dev) < 0) {
> > +		rte_pktmbuf_free(hw->rarp_buf[0]);
> > +		return;
> > +	}
> > +
> > +	dev->tx_pkt_burst(txvq, hw->rarp_buf, 1);
> 
> You have already provided virtio_dev_pause()/virtio_dev_resume().
> I think you can also make this part generic and provide an inject
> function, e.g.:
> 
> uint16_t
> virtio_inject_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
> {
> 	......
> 
> 	txvq->inject_pkts = tx_pkts;
> 	nb_tx = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
> 	txvq->inject_pkts = NULL;
> 
> 	return nb_tx;
> }
> 
> And you can introduce virtio_dev_pause()/virtio_dev_resume()/
> virtio_injec... in a separate patch. And introduce the GUEST
> ANNOUNCE support in the third patch.

That would be a better patch organization, thanks! Will make a v4.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v3 0/2] net/virtio: support GUEST ANNOUNCE
  2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2017-12-04  8:46       ` Tiwei Bie
  2017-12-06 11:23       ` Tiwei Bie
@ 2018-01-04  7:41       ` Xiao Wang
  2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 1/2] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-04  7:41 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

When live migration is finished, the backup VM needs to proactively announce
its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
generate a RARP packet to switch in dequeue path. Another method is to let
the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
feature.

This patch set enables this feature in virtio pmd, to support VM running virtio
pmd be migrated without vhost supporting RARP generation.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (2):
  net/virtio: make control queue thread-safe
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 145 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   1 +
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 7 files changed, 165 insertions(+), 5 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v3 1/2] net/virtio: make control queue thread-safe
  2018-01-04  7:41       ` [dpdk-dev] [PATCH v3 0/2] " Xiao Wang
@ 2018-01-04  7:41         ` Xiao Wang
  2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  1 sibling, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-04  7:41 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e0328f6..ac73950 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -177,6 +177,8 @@ struct rte_virtio_xstats_name_off {
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->sl);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->sl);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -261,6 +265,7 @@ struct rte_virtio_xstats_name_off {
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->sl);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 390c137..6a24fde 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -407,6 +407,7 @@
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->sl);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 54f1e84..71b5798 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -84,6 +84,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t sl;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE
  2018-01-04  7:41       ` [dpdk-dev] [PATCH v3 0/2] " Xiao Wang
  2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 1/2] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-04  7:41         ` Xiao Wang
  2018-01-04  2:51           ` Tiwei Bie
  2018-01-04 15:59           ` [dpdk-dev] [PATCH v4 0/3] " Xiao Wang
  1 sibling, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-04  7:41 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.
---
 drivers/net/virtio/virtio_ethdev.c      | 138 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   1 +
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 6 files changed, 157 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ac73950..80bad52 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -48,6 +48,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -55,6 +57,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -106,6 +109,13 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static int virtio_dev_pause(struct rte_eth_dev *dev);
+static void virtio_dev_resume(struct rte_eth_dev *dev);
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1249,9 +1259,116 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	return 0;
 }
 
+#define RARP_PKT_SIZE	64
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (hw->started == 0)
+		return -1;
+	hw->started = 0;
+	/*
+	 * Prevent the worker thread from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+static void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+}
+
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+
+	hw->rarp_buf[0] = rte_mbuf_raw_alloc(rxvq->mpool);
+	if (hw->rarp_buf[0] == NULL) {
+		PMD_DRV_LOG(ERR, "first mbuf allocate failed");
+		return;
+	}
+
+	if (make_rarp_packet(hw->rarp_buf[0],
+				(struct ether_addr *)hw->mac_addr)) {
+		rte_pktmbuf_free(hw->rarp_buf[0]);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(hw->rarp_buf[0]);
+		return;
+	}
+
+	dev->tx_pkt_burst(txvq, hw->rarp_buf, 1);
+	/* Recover the stored hw status to let worker thread continue */
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+	len = 0;
+
+	virtio_send_command(hw->cvq, &ctrl, &len, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1274,6 +1391,12 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		rte_spinlock_lock(&hw->state_lock);
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+		rte_spinlock_unlock(&hw->state_lock);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
@@ -1786,6 +1909,8 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1892,6 +2017,12 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
 	/* Initialize Link state */
 	virtio_dev_link_update(dev, 0);
 
+	hw->rarp_buf = rte_zmalloc("rarp_buf", sizeof(struct rte_mbuf *), 0);
+	if (!hw->rarp_buf) {
+		PMD_INIT_LOG(ERR, "Failed to allocate rarp pointer");
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -1952,12 +2083,15 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_free(hw->rarp_buf);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 2039bc5..58faa22 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 3c5ce66..7bd6c50 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -270,6 +270,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change the 'started' flag, this lock is meant to
+	 * avoid such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **rarp_buf;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6a24fde..6bae8a6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -1017,7 +1017,7 @@
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0) && tx_pkts != hw->rarp_buf)
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index b5bc1c4..a04f20e 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -99,7 +99,7 @@ int __attribute__((cold))
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0) && tx_pkts != hw->rarp_buf)
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2305d91..d9045e1 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-04  7:56               ` Tiwei Bie
  2018-01-05 16:46               ` [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-05 20:27               ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Stephen Hemminger
  2 siblings, 0 replies; 112+ messages in thread
From: Tiwei Bie @ 2018-01-04  7:56 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu, stephen

On Thu, Jan 04, 2018 at 07:59:37AM -0800, Xiao Wang wrote:
[...]
> +void
> +virtio_inject_pkts(struct rte_eth_dev *dev, struct virtnet_tx *txvq,
> +		struct rte_mbuf **buf, int count)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	hw->special_buf = buf;

I think maybe you can give it (hw->special_buf) a
better (more meaningful) name.

You don't need to put txvq in the param list based
on your current implementation. Otherwise, you need
to make special_buf be per-queue variable.

> +	dev->tx_pkt_burst(txvq, buf, count);
> +}
> +

You need to zero the hw->special_buf after calling
tx_pkt_burst().

You should also return the retval of tx_pkt_burst()
to the caller.

Thanks,
Tiwei

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-04 11:13               ` Tiwei Bie
  0 siblings, 0 replies; 112+ messages in thread
From: Tiwei Bie @ 2018-01-04 11:13 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu, stephen

On Thu, Jan 04, 2018 at 07:59:38AM -0800, Xiao Wang wrote:
[...]
> +static void
> +virtio_notify_peers(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +	struct rte_mbuf **rarp_buf;
> +
> +	rarp_buf = rte_zmalloc("rarp_buf", sizeof(struct rte_mbuf *), 0);
> +	if (!rarp_buf) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate rarp pointer");
> +		return;
> +	}
> +
> +	rarp_buf[0] = rte_mbuf_raw_alloc(rxvq->mpool);
> +	if (rarp_buf[0] == NULL) {
> +		PMD_DRV_LOG(ERR, "first mbuf allocate free_bufed");
> +		goto free_buf;
> +	}
> +
> +	if (make_rarp_packet(rarp_buf[0],
> +				(struct ether_addr *)hw->mac_addr)) {
> +		rte_pktmbuf_free(rarp_buf[0]);
> +		goto free_buf;
> +	}
> +
> +	/* If virtio port just stopped, no need to send RARP */
> +	if (virtio_dev_pause(dev) < 0) {
> +		rte_pktmbuf_free(rarp_buf[0]);
> +		goto free_buf;
> +	}
> +
> +	virtio_inject_pkts(dev, txvq, rarp_buf, 1);

You don't need to define rarp_buf as `struct rte_mbuf **`,
and dynamically alloc the mbuf pointer. You could alloc a
mbuf pointer on the stack directly, e.g.:

	struct rte_mbuf *rarp_mbuf;
	rarp_mbuf = rte_pktmbuf_alloc(...);
	...
	if (make_rarp_packet(rarp_mbuf, ...))
		...
	virtio_inject_pkts(..., &rarp_mbuf, 1);

Thanks,
Tiwei

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v4 0/3] net/virtio: support GUEST ANNOUNCE
  2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-04  2:51           ` Tiwei Bie
@ 2018-01-04 15:59           ` Xiao Wang
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 1/3] net/virtio: make control queue thread-safe Xiao Wang
                               ` (2 more replies)
  1 sibling, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-04 15:59 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (2):
  net/virtio: make control queue thread-safe
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 145 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   1 +
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-

Xiao Wang (3):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 159 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   7 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 7 files changed, 185 insertions(+), 5 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v4 1/3] net/virtio: make control queue thread-safe
  2018-01-04 15:59           ` [dpdk-dev] [PATCH v4 0/3] " Xiao Wang
@ 2018-01-04 15:59             ` Xiao Wang
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-04 15:59 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e0328f6..ac73950 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -177,6 +177,8 @@ struct rte_virtio_xstats_name_off {
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->sl);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->sl);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -261,6 +265,7 @@ struct rte_virtio_xstats_name_off {
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->sl);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 390c137..6a24fde 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -407,6 +407,7 @@
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->sl);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 54f1e84..71b5798 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -84,6 +84,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t sl;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method
  2018-01-04 15:59           ` [dpdk-dev] [PATCH v4 0/3] " Xiao Wang
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 1/3] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-04 15:59             ` Xiao Wang
  2018-01-04  7:56               ` Tiwei Bie
                                 ` (2 more replies)
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-04 15:59 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts api to allow
driver to pause the worker thread and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 44 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  6 +++++
 drivers/net/virtio/virtio_pci.h         |  7 ++++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ac73950..6745de7 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -55,6 +55,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1249,6 +1250,45 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker thread from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+void
+virtio_inject_pkts(struct rte_eth_dev *dev, struct virtnet_tx *txvq,
+		struct rte_mbuf **buf, int count)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->special_buf = buf;
+	dev->tx_pkt_burst(txvq, buf, count);
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1786,6 +1826,8 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1952,12 +1994,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 2039bc5..e973de3 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -37,6 +37,7 @@
 #include <stdint.h>
 
 #include "virtio_pci.h"
+#include "virtio_rxtx.h"
 
 #define SPEED_10	10
 #define SPEED_100	100
@@ -121,4 +122,9 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+void virtio_inject_pkts(struct rte_eth_dev *dev,
+		struct virtnet_tx *txvq, struct rte_mbuf **buf, int count);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 3c5ce66..8d91320 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -270,6 +270,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change the 'started' flag, this lock is meant to
+	 * avoid such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **special_buf;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6a24fde..1438f05 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -1017,7 +1017,7 @@
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0) && tx_pkts != hw->special_buf)
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index b5bc1c4..b3f5d2e 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -99,7 +99,7 @@ int __attribute__((cold))
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0) && tx_pkts != hw->special_buf)
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v4 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-04 15:59           ` [dpdk-dev] [PATCH v4 0/3] " Xiao Wang
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 1/3] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-04 15:59             ` Xiao Wang
  2018-01-04 11:13               ` Tiwei Bie
  2 siblings, 1 reply; 112+ messages in thread
From: Xiao Wang @ 2018-01-04 15:59 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 108 ++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |   1 +
 drivers/net/virtio/virtqueue.h     |  11 ++++
 3 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 6745de7..288a1a7 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -48,6 +48,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -107,6 +109,11 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1289,9 +1296,102 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	dev->tx_pkt_burst(txvq, buf, count);
 }
 
+#define RARP_PKT_SIZE	64
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf **rarp_buf;
+
+	rarp_buf = rte_zmalloc("rarp_buf", sizeof(struct rte_mbuf *), 0);
+	if (!rarp_buf) {
+		PMD_INIT_LOG(ERR, "Failed to allocate rarp pointer");
+		return;
+	}
+
+	rarp_buf[0] = rte_mbuf_raw_alloc(rxvq->mpool);
+	if (rarp_buf[0] == NULL) {
+		PMD_DRV_LOG(ERR, "first mbuf allocate free_bufed");
+		goto free_buf;
+	}
+
+	if (make_rarp_packet(rarp_buf[0],
+				(struct ether_addr *)hw->mac_addr)) {
+		rte_pktmbuf_free(rarp_buf[0]);
+		goto free_buf;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_buf[0]);
+		goto free_buf;
+	}
+
+	virtio_inject_pkts(dev, txvq, rarp_buf, 1);
+	/* Recover the stored hw status to let worker thread continue */
+	virtio_dev_resume(dev);
+
+free_buf:
+	rte_free(rarp_buf);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+	len = 0;
+
+	virtio_send_command(hw->cvq, &ctrl, &len, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1314,6 +1414,10 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index e973de3..13a5c86 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -68,6 +68,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2305d91..d9045e1 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-04  7:56               ` Tiwei Bie
@ 2018-01-05 16:46               ` Xiao Wang
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 1/3] net/virtio: make control queue thread-safe Xiao Wang
                                   ` (2 more replies)
  2018-01-05 20:27               ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Stephen Hemminger
  2 siblings, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-05 16:46 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (3):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 155 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   7 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 7 files changed, 181 insertions(+), 5 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v5 1/3] net/virtio: make control queue thread-safe
  2018-01-05 16:46               ` [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-05 16:46                 ` Xiao Wang
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-05 16:46 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e0328f6..ac73950 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -177,6 +177,8 @@ struct rte_virtio_xstats_name_off {
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->sl);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -184,8 +186,10 @@ struct rte_virtio_xstats_name_off {
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->sl);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -261,6 +265,7 @@ struct rte_virtio_xstats_name_off {
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->sl);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 390c137..6a24fde 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -407,6 +407,7 @@
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->sl);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 54f1e84..71b5798 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -84,6 +84,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t sl;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method
  2018-01-05 16:46               ` [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 1/3] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-05 16:46                 ` Xiao Wang
  2018-01-05 18:00                   ` Tiwei Bie
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 1 reply; 112+ messages in thread
From: Xiao Wang @ 2018-01-05 16:46 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts api to allow
driver to pause the worker thread and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 51 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  6 ++++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ac73950..a2b5d34 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -55,6 +55,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1249,6 +1250,52 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* In case that the device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker thread from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let worker thread continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **buf, int count)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_buf = buf;
+	ret = dev->tx_pkt_burst(txvq, buf, count);
+	hw->inject_buf = NULL;
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1786,6 +1833,8 @@ static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1952,12 +2001,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 2039bc5..4a2a2f0 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -37,6 +37,7 @@
 #include <stdint.h>
 
 #include "virtio_pci.h"
+#include "virtio_rxtx.h"
 
 #define SPEED_10	10
 #define SPEED_100	100
@@ -121,4 +122,9 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **buf,
+		int count);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 3c5ce66..e691fb3 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -270,6 +270,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change the 'started' flag, this lock is meant to
+	 * avoid such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_buf;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6a24fde..bbf5aaf 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -1017,7 +1017,7 @@
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0) && tx_pkts != hw->inject_buf)
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index b5bc1c4..d81d162 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -99,7 +99,7 @@ int __attribute__((cold))
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0) && tx_pkts != hw->inject_buf)
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-05 16:46               ` [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 1/3] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-05 16:46                 ` Xiao Wang
  2018-01-05 17:56                   ` Tiwei Bie
  2018-01-07 12:05                   ` [dpdk-dev] [PATCH v6 0/3] " Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-05 16:46 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 97 +++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 +++++
 3 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index a2b5d34..3a21098 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -48,6 +48,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -107,6 +109,11 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1296,9 +1303,91 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 	return ret;
 }
 
+#define RARP_PKT_SIZE	64
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "first mbuf allocate free_bufed");
+		return;
+	}
+
+	if (make_rarp_packet(rarp_mbuf,
+			(struct ether_addr *)hw->mac_addr) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+	int len;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+	len = 0;
+
+	virtio_send_command(hw->cvq, &ctrl, &len, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1321,6 +1410,10 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 4a2a2f0..04b6a37 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -68,6 +68,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2305d91..d9045e1 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-05 17:56                   ` Tiwei Bie
  2018-01-07  2:29                     ` Wang, Xiao W
  2018-01-07 12:05                   ` [dpdk-dev] [PATCH v6 0/3] " Xiao Wang
  1 sibling, 1 reply; 112+ messages in thread
From: Tiwei Bie @ 2018-01-05 17:56 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu, stephen

On Fri, Jan 05, 2018 at 08:46:57AM -0800, Xiao Wang wrote:
[...]
> +static int
> +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr  *rarp;

Please just use one space between the type and var instead of two.

> +
[...]
> +static void
> +virtio_notify_peers(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +	struct rte_mbuf *rarp_mbuf;
> +
> +	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);

It's not necessary to use rte_mbuf_raw_alloc() here and
you forgot to initialize the allocated mbuf. I think you
can use rte_pktmbuf_alloc() directly as what I showed in
the example in my previous mail.

> +	if (rarp_mbuf == NULL) {
> +		PMD_DRV_LOG(ERR, "first mbuf allocate free_bufed");

Typos:
first?
free_bufed?

> +		return;
> +	}
[...]
> +static void
> +virtio_ack_link_announce(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtio_pmd_ctrl ctrl;
> +	int len;
> +
> +	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
> +	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
> +	len = 0;
> +
> +	virtio_send_command(hw->cvq, &ctrl, &len, 0);

If the last param is 0, then the third param could be NULL,
i.e. you don't need to define `len`.

> +}
> +
[...]
> diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
> index 4a2a2f0..04b6a37 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -68,6 +68,7 @@
>  	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
>  	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
>  	 1u << VIRTIO_NET_F_MTU	| \
> +	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \

Please use one space before '|' instead of two.

Thanks,
Tiwei

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-05 18:00                   ` Tiwei Bie
  2018-01-07  2:37                     ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Tiwei Bie @ 2018-01-05 18:00 UTC (permalink / raw)
  To: Xiao Wang; +Cc: dev, yliu, stephen

On Fri, Jan 05, 2018 at 08:46:56AM -0800, Xiao Wang wrote:
[...]
> +/*
> + * Recover hw state to let worker thread continue.
> + */
> +void
> +virtio_dev_resume(struct rte_eth_dev *dev)
> +{
> +	struct virtio_hw *hw = dev->data->dev_private;
> +
> +	hw->started = 1;
> +	rte_spinlock_unlock(&hw->state_lock);
> +}
> +
> +int
> +virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **buf, int count)
> +{

It would be better to name `buf` as tx_pkts and name
`count` as nb_pkts.

It would be better to add some comments to highlight
that the device needs to be paused before calling this
function in driver.

> +	struct virtio_hw *hw = dev->data->dev_private;
> +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> +	int ret;
[...]
> diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
> index 2039bc5..4a2a2f0 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -37,6 +37,7 @@
>  #include <stdint.h>
>  
>  #include "virtio_pci.h"
> +#include "virtio_rxtx.h"

It's not necessary to include this header file.

>  
>  #define SPEED_10	10
>  #define SPEED_100	100
> @@ -121,4 +122,9 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
>  
>  void virtio_interrupt_handler(void *param);
>  
> +int virtio_dev_pause(struct rte_eth_dev *dev);
> +void virtio_dev_resume(struct rte_eth_dev *dev);
> +int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **buf,
> +		int count);

Ditto.

> +
[...]
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index 6a24fde..bbf5aaf 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -1017,7 +1017,7 @@
>  	uint16_t nb_used, nb_tx = 0;
>  	int error;
>  
> -	if (unlikely(hw->started == 0))
> +	if (unlikely(hw->started == 0) && tx_pkts != hw->inject_buf)

Why not just put all the condition checks in unlikely()?

If (hw->started == 0) is "unlikely", then
(hw->started == 0 && tx_pkts != hw->inject_buf) would
be more "unlikely".

>  		return nb_tx;
>  
>  	if (unlikely(nb_pkts < 1))
> diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
> index b5bc1c4..d81d162 100644
> --- a/drivers/net/virtio/virtio_rxtx_simple.c
> +++ b/drivers/net/virtio/virtio_rxtx_simple.c
> @@ -99,7 +99,7 @@ int __attribute__((cold))
>  	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
>  	uint16_t nb_tx = 0;
>  
> -	if (unlikely(hw->started == 0))
> +	if (unlikely(hw->started == 0) && tx_pkts != hw->inject_buf)

Ditto.

Thanks,
Tiwei

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method
  2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-04  7:56               ` Tiwei Bie
  2018-01-05 16:46               ` [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-05 20:27               ` Stephen Hemminger
  2018-01-06  4:41                 ` Tiwei Bie
  2 siblings, 1 reply; 112+ messages in thread
From: Stephen Hemminger @ 2018-01-05 20:27 UTC (permalink / raw)
  To: Xiao Wang; +Cc: tiwei.bie, dev, yliu

On Thu,  4 Jan 2018 07:59:37 -0800
Xiao Wang <xiao.w.wang@intel.com> wrote:

> This patch adds dev_pause, dev_resume and inject_pkts api to allow
> driver to pause the worker thread and inject special packets into
> Tx queue. The next patch will be based on this.
> 
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>

Why is this needed? It isn't obvious what the mechanism is trying to solve.

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method
  2018-01-05 20:27               ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Stephen Hemminger
@ 2018-01-06  4:41                 ` Tiwei Bie
  0 siblings, 0 replies; 112+ messages in thread
From: Tiwei Bie @ 2018-01-06  4:41 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Xiao Wang, dev, yliu

On Fri, Jan 05, 2018 at 12:27:37PM -0800, Stephen Hemminger wrote:
> On Thu,  4 Jan 2018 07:59:37 -0800
> Xiao Wang <xiao.w.wang@intel.com> wrote:
> 
> > This patch adds dev_pause, dev_resume and inject_pkts api to allow
> > driver to pause the worker thread and inject special packets into
> > Tx queue. The next patch will be based on this.
> > 
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> 
> Why is this needed? It isn't obvious what the mechanism is trying to solve.

Xiao needs to use such mechanism to send some special packets
(RARP packet) in the interrupt handler to implement the GUEST
ANNOUNCE feature.

To avoid the contention between user's Tx threads and the
interrupt thread. He needs to pause user's Tx threads (by
reusing the existing 'started' flag in `virtio_hw`) first,
and call tx_burst() to send the RARP packet.

He already provided the pause() and resume() functions, but
the implementation of sending the RARP packet (add a field
named as `rarp_buf` in `virtio_hw`, and check it in tx_burst()
functions) is too specific. So I just suggested him to give
rarp_buf a more generic name and provide a simple wrapper of
tx_burst for internal use:

http://dpdk.org/ml/archives/dev/2018-January/085213.html

Is it OK to you? Or do you have any other suggestions? Thanks!

PS. The latest version is v5, below is the link:
http://dpdk.org/ml/archives/dev/2018-January/085354.html

Best regards,
Tiwei Bie

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-05 17:56                   ` Tiwei Bie
@ 2018-01-07  2:29                     ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-07  2:29 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: dev, yliu, stephen

Hi

> -----Original Message-----
> From: Bie, Tiwei
> Sent: Saturday, January 6, 2018 1:57 AM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; yliu@fridaylinux.org; stephen@networkplumber.org
> Subject: Re: [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE
> 
> On Fri, Jan 05, 2018 at 08:46:57AM -0800, Xiao Wang wrote:
> [...]
> > +static int
> > +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr
> *mac)
> > +{
> > +	struct ether_hdr *eth_hdr;
> > +	struct arp_hdr  *rarp;
> 
> Please just use one space between the type and var instead of two.

Yes.

> 
> > +
> [...]
> > +static void
> > +virtio_notify_peers(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> > +	struct rte_mbuf *rarp_mbuf;
> > +
> > +	rarp_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
> 
> It's not necessary to use rte_mbuf_raw_alloc() here and
> you forgot to initialize the allocated mbuf. I think you
> can use rte_pktmbuf_alloc() directly as what I showed in
> the example in my previous mail.

You are right.

> 
> > +	if (rarp_mbuf == NULL) {
> > +		PMD_DRV_LOG(ERR, "first mbuf allocate free_bufed");
> 
> Typos:
> first?
> free_bufed?

Sorry for typo.

> 
> > +		return;
> > +	}
> [...]
> > +static void
> > +virtio_ack_link_announce(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct virtio_pmd_ctrl ctrl;
> > +	int len;
> > +
> > +	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
> > +	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
> > +	len = 0;
> > +
> > +	virtio_send_command(hw->cvq, &ctrl, &len, 0);
> 
> If the last param is 0, then the third param could be NULL,
> i.e. you don't need to define `len`.
> 

Just checked the code, when pkt_num is 0, len field won't be used.
Will make it NULL in v6.

> > +}
> > +
> [...]
> > diff --git a/drivers/net/virtio/virtio_ethdev.h
> b/drivers/net/virtio/virtio_ethdev.h
> > index 4a2a2f0..04b6a37 100644
> > --- a/drivers/net/virtio/virtio_ethdev.h
> > +++ b/drivers/net/virtio/virtio_ethdev.h
> > @@ -68,6 +68,7 @@
> >  	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
> >  	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
> >  	 1u << VIRTIO_NET_F_MTU	| \
> > +	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE  | \
> 
> Please use one space before '|' instead of two.

Yes, will keep it aligned with the above lines.

Thanks a lot,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method
  2018-01-05 18:00                   ` Tiwei Bie
@ 2018-01-07  2:37                     ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-07  2:37 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: dev, yliu, stephen

Hi,

> -----Original Message-----
> From: Bie, Tiwei
> Sent: Saturday, January 6, 2018 2:01 AM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: dev@dpdk.org; yliu@fridaylinux.org; stephen@networkplumber.org
> Subject: Re: [PATCH v5 2/3] net/virtio: add packet injection method
> 
> On Fri, Jan 05, 2018 at 08:46:56AM -0800, Xiao Wang wrote:
> [...]
> > +/*
> > + * Recover hw state to let worker thread continue.
> > + */
> > +void
> > +virtio_dev_resume(struct rte_eth_dev *dev)
> > +{
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +
> > +	hw->started = 1;
> > +	rte_spinlock_unlock(&hw->state_lock);
> > +}
> > +
> > +int
> > +virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **buf, int count)
> > +{
> 
> It would be better to name `buf` as tx_pkts and name
> `count` as nb_pkts.
> 
> It would be better to add some comments to highlight
> that the device needs to be paused before calling this
> function in driver.

Yes, making it aligned with the existing virtio_xmit_pkts looks better.
A highlight comment is helpful. Will add it in v6.

> 
> > +	struct virtio_hw *hw = dev->data->dev_private;
> > +	struct virtnet_tx *txvq = dev->data->tx_queues[0];
> > +	int ret;
> [...]
> > diff --git a/drivers/net/virtio/virtio_ethdev.h
> b/drivers/net/virtio/virtio_ethdev.h
> > index 2039bc5..4a2a2f0 100644
> > --- a/drivers/net/virtio/virtio_ethdev.h
> > +++ b/drivers/net/virtio/virtio_ethdev.h
> > @@ -37,6 +37,7 @@
> >  #include <stdint.h>
> >
> >  #include "virtio_pci.h"
> > +#include "virtio_rxtx.h"
> 
> It's not necessary to include this header file.

Yes, it should be removed since I have removed the txvq parameter in virtio_inject_pkts.
> 
> >
> >  #define SPEED_10	10
> >  #define SPEED_100	100
> > @@ -121,4 +122,9 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue,
> struct rte_mbuf **tx_pkts,
> >
> >  void virtio_interrupt_handler(void *param);
> >
> > +int virtio_dev_pause(struct rte_eth_dev *dev);
> > +void virtio_dev_resume(struct rte_eth_dev *dev);
> > +int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **buf,
> > +		int count);
> 
> Ditto.
> 
> > +
> [...]
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 6a24fde..bbf5aaf 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -1017,7 +1017,7 @@
> >  	uint16_t nb_used, nb_tx = 0;
> >  	int error;
> >
> > -	if (unlikely(hw->started == 0))
> > +	if (unlikely(hw->started == 0) && tx_pkts != hw->inject_buf)
> 
> Why not just put all the condition checks in unlikely()?
> 
> If (hw->started == 0) is "unlikely", then
> (hw->started == 0 && tx_pkts != hw->inject_buf) would
> be more "unlikely".

Your way could ensure that datapath perf is not affected.
Will change it in v6.

Thanks a lot,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v6 0/3] net/virtio: support GUEST ANNOUNCE
  2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-05 17:56                   ` Tiwei Bie
@ 2018-01-07 12:05                   ` Xiao Wang
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe Xiao Wang
                                       ` (2 more replies)
  1 sibling, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-07 12:05 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (3):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 158 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 7 files changed, 183 insertions(+), 5 deletions(-)

-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe
  2018-01-07 12:05                   ` [dpdk-dev] [PATCH v6 0/3] " Xiao Wang
@ 2018-01-07 12:05                     ` Xiao Wang
  2018-01-08 13:06                       ` Yuanhan Liu
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 3/3] " Xiao Wang
  2 siblings, 1 reply; 112+ messages in thread
From: Xiao Wang @ 2018-01-07 12:05 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e0328f61d..ac739506e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -177,6 +177,8 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->sl);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -184,8 +186,10 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->sl);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -261,6 +265,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->sl);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 390c137c8..6a24fdecb 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -407,6 +407,7 @@ virtio_dev_cq_start(struct rte_eth_dev *dev)
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->sl);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 54f1e849b..71b5798b0 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -84,6 +84,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t sl;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method
  2018-01-07 12:05                   ` [dpdk-dev] [PATCH v6 0/3] " Xiao Wang
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-07 12:05                     ` Xiao Wang
  2018-01-08 13:03                       ` Yuanhan Liu
  2018-01-09 14:26                       ` [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 3/3] " Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-07 12:05 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts api to allow
driver to pause the worker threads and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 56 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  5 +++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ac739506e..8f64220b0 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -55,6 +55,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1249,6 +1250,57 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1786,6 +1838,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1952,12 +2006,14 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 2039bc547..6b0c4f9af 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -121,4 +121,9 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 3c5ce66ce..e5099d815 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -270,6 +270,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change the 'started' flag, this lock is meant to
+	 * avoid such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_pkts;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6a24fdecb..5ab14b9ca 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -1017,7 +1017,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index b5bc1c49f..960d51d8e 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -99,7 +99,7 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v6 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-07 12:05                   ` [dpdk-dev] [PATCH v6 0/3] " Xiao Wang
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-07 12:05                     ` Xiao Wang
  2 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-07 12:05 UTC (permalink / raw)
  To: tiwei.bie; +Cc: dev, yliu, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 95 +++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 +++++
 3 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 8f64220b0..4174da186 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -48,6 +48,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -107,6 +109,11 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1301,9 +1308,89 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
 	return ret;
 }
 
+#define RARP_PKT_SIZE	64
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_pktmbuf_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (make_rarp_packet(rarp_mbuf,
+			(struct ether_addr *)hw->mac_addr) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+
+	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1326,6 +1413,10 @@ virtio_interrupt_handler(void *param)
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 6b0c4f9af..5bc5d9385 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |	\
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2305d91a4..d9045e156 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -158,6 +158,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-08 13:03                       ` Yuanhan Liu
  2018-01-08 15:11                         ` Wang, Xiao W
  2018-01-09 14:26                       ` [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  1 sibling, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-08 13:03 UTC (permalink / raw)
  To: Xiao Wang; +Cc: tiwei.bie, dev, stephen

On Sun, Jan 07, 2018 at 04:05:12AM -0800, Xiao Wang wrote:
> +	/*
> +	 * App management thread and virtio interrupt handler thread
> +	 * both can change the 'started' flag, this lock is meant to
> +	 * avoid such a contention.
> +	 */
> +	rte_spinlock_t state_lock;

Why not turning the "started" to atomic type, so that you don't need
the lock?

	--yliu
> +	struct rte_mbuf **inject_pkts;
>  
>  	struct virtqueue **vqs;
>  };

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-08 13:06                       ` Yuanhan Liu
  2018-01-08 15:25                         ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-08 13:06 UTC (permalink / raw)
  To: Xiao Wang; +Cc: tiwei.bie, dev, stephen

On Sun, Jan 07, 2018 at 04:05:11AM -0800, Xiao Wang wrote:
> diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> index 54f1e849b..71b5798b0 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -84,6 +84,7 @@ struct virtnet_ctl {
>  	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
>  	uint16_t port_id;               /**< Device port identifier. */
>  	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
> +	rte_spinlock_t sl;              /**< spinlock for control queue. */

It's weird to name it "sl". The typical naming is just "lock".

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method
  2018-01-08 13:03                       ` Yuanhan Liu
@ 2018-01-08 15:11                         ` Wang, Xiao W
  2018-01-09  2:55                           ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-08 15:11 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: Bie, Tiwei, dev, stephen



> -----Original Message-----
> From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> Sent: Monday, January 8, 2018 9:04 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: Bie, Tiwei <tiwei.bie@intel.com>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: Re: [PATCH v6 2/3] net/virtio: add packet injection method
> 
> On Sun, Jan 07, 2018 at 04:05:12AM -0800, Xiao Wang wrote:
> > +	/*
> > +	 * App management thread and virtio interrupt handler thread
> > +	 * both can change the 'started' flag, this lock is meant to
> > +	 * avoid such a contention.
> > +	 */
> > +	rte_spinlock_t state_lock;
> 
> Why not turning the "started" to atomic type, so that you don't need
> the lock?
> 
> 	--yliu

To avoid impacting datapath performance, this patch doesn't change "started" to atomic 
type.

During the interrupt handler routine, there are a series of instructions between lock acquire and release. An atomic value is not suitable for this scenario.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe
  2018-01-08 13:06                       ` Yuanhan Liu
@ 2018-01-08 15:25                         ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-08 15:25 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: Bie, Tiwei, dev, stephen

Hi,

> -----Original Message-----
> From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> Sent: Monday, January 8, 2018 9:07 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: Bie, Tiwei <tiwei.bie@intel.com>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: Re: [PATCH v6 1/3] net/virtio: make control queue thread-safe
> 
> On Sun, Jan 07, 2018 at 04:05:11AM -0800, Xiao Wang wrote:
> > diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> > index 54f1e849b..71b5798b0 100644
> > --- a/drivers/net/virtio/virtio_rxtx.h
> > +++ b/drivers/net/virtio/virtio_rxtx.h
> > @@ -84,6 +84,7 @@ struct virtnet_ctl {
> >  	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
> >  	uint16_t port_id;               /**< Device port identifier. */
> >  	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring.
> */
> > +	rte_spinlock_t sl;              /**< spinlock for control queue. */
> 
> It's weird to name it "sl". The typical naming is just "lock".
> 
> 	--yliu

I'm open to the naming method, but you can see that:
struct rte_mempool_ops_table also has a "sl" field.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method
  2018-01-08 15:11                         ` Wang, Xiao W
@ 2018-01-09  2:55                           ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-09  2:55 UTC (permalink / raw)
  To: 'Yuanhan Liu'
  Cc: Bie, Tiwei, 'dev@dpdk.org', 'stephen@networkplumber.org'



> -----Original Message-----
> From: Wang, Xiao W
> Sent: Monday, January 8, 2018 11:12 PM
> To: Yuanhan Liu <yliu@fridaylinux.org>
> Cc: Bie, Tiwei <tiwei.bie@intel.com>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: RE: [PATCH v6 2/3] net/virtio: add packet injection method
> 
> 
> 
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> > Sent: Monday, January 8, 2018 9:04 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: Bie, Tiwei <tiwei.bie@intel.com>; dev@dpdk.org;
> > stephen@networkplumber.org
> > Subject: Re: [PATCH v6 2/3] net/virtio: add packet injection method
> >
> > On Sun, Jan 07, 2018 at 04:05:12AM -0800, Xiao Wang wrote:
> > > +	/*
> > > +	 * App management thread and virtio interrupt handler thread
> > > +	 * both can change the 'started' flag, this lock is meant to
> > > +	 * avoid such a contention.
> > > +	 */
> > > +	rte_spinlock_t state_lock;
> >
> > Why not turning the "started" to atomic type, so that you don't need
> > the lock?
> >

> During the interrupt handler routine, there are a series of instructions
> between lock acquire and release. An atomic value is not suitable for this
> scenario.
> 

The current comment may doesn't explain the state_lock correctly, this lock needs to be acquired in dev_pause and released in dev_resume, so it's not just used to protect the "started" value.

I would improve the comment as " App management thread and virtio interrupt handler thread both can change device state, ..."

Thanks for comments,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-09  8:49                           ` Maxime Coquelin
  2018-01-09 10:58                             ` Wang, Xiao W
  2018-01-09 11:03                             ` Wang, Xiao W
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
  1 sibling, 2 replies; 112+ messages in thread
From: Maxime Coquelin @ 2018-01-09  8:49 UTC (permalink / raw)
  To: Xiao Wang, yliu; +Cc: tiwei.bie, dev, stephen



On 01/09/2018 03:26 PM, Xiao Wang wrote:
> When live migration is done, for the backup VM, either the virtio
> frontend or the vhost backend needs to send out gratuitous RARP packet
> to announce its new network location.
> 
> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
> migration scenario where the vhost backend doesn't have the ability to
> generate RARP packet.
> 
> Brief introduction of the work flow:
> 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> 2. Virtio interrupt handler reads out the interrupt status value, and
>     realizes it needs to send out RARP packet to announce its location.
> 3. Pause device to stop worker thread touching the queues.
> 4. Inject a RARP packet into a Tx Queue.
> 5. Ack the interrupt via control queue.
> 6. Resume device to continue packet processing.
> 
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c | 95 +++++++++++++++++++++++++++++++++++++-
>   drivers/net/virtio/virtio_ethdev.h |  1 +
>   drivers/net/virtio/virtqueue.h     | 11 +++++
>   3 files changed, 105 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index e8ff1e449..9606df514 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -19,6 +19,8 @@
>   #include <rte_pci.h>
>   #include <rte_bus_pci.h>
>   #include <rte_ether.h>
> +#include <rte_ip.h>
> +#include <rte_arp.h>
>   #include <rte_common.h>
>   #include <rte_errno.h>
>   #include <rte_cpuflags.h>
> @@ -78,6 +80,11 @@ static int virtio_dev_queue_stats_mapping_set(
>   	uint8_t stat_idx,
>   	uint8_t is_rx);
>   
> +static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
> +		const struct ether_addr *mac);
> +static void virtio_notify_peers(struct rte_eth_dev *dev);
> +static void virtio_ack_link_announce(struct rte_eth_dev *dev);
> +
>   /*
>    * The set of PCI devices this driver supports
>    */
> @@ -1272,9 +1279,89 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
>   	return ret;
>   }
>   
> +#define RARP_PKT_SIZE	64
> +static int
> +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr *rarp;
> +
> +	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
> +		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
> +				rarp_mbuf->buf_len, RARP_PKT_SIZE);
> +		return -1;
> +	}
> +
> +	/* Ethernet header. */
> +	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
> +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> +	ether_addr_copy(mac, &eth_hdr->s_addr);
> +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> +
> +	/* RARP header. */
> +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> +	rarp->arp_hln = ETHER_ADDR_LEN;
> +	rarp->arp_pln = 4;
> +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> +
> +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> +
> +	rarp_mbuf->data_len = RARP_PKT_SIZE;
> +	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
> +
> +	return 0;
> +}

Do you think it could make sense to have this function in a lib, as
vhost user lib does exactly the same?

I don't know if it could be useful to others than vhost/virtio though.

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-09  8:49                           ` Maxime Coquelin
@ 2018-01-09 10:58                             ` Wang, Xiao W
  2018-01-09 11:03                             ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-09 10:58 UTC (permalink / raw)
  To: Maxime Coquelin, yliu, thomas.monjalon; +Cc: Bie, Tiwei, dev, stephen

Hi,

> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Tuesday, January 9, 2018 4:50 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>; yliu@fridaylinux.org
> Cc: Bie, Tiwei <tiwei.bie@intel.com>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: Re: [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
> 
> 
> 
> On 01/09/2018 03:26 PM, Xiao Wang wrote:
> > When live migration is done, for the backup VM, either the virtio
> > frontend or the vhost backend needs to send out gratuitous RARP packet
> > to announce its new network location.
> >
> > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> live
> > migration scenario where the vhost backend doesn't have the ability to
> > generate RARP packet.
> >
> > Brief introduction of the work flow:
> > 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> > 2. Virtio interrupt handler reads out the interrupt status value, and
> >     realizes it needs to send out RARP packet to announce its location.
> > 3. Pause device to stop worker thread touching the queues.
> > 4. Inject a RARP packet into a Tx Queue.
> > 5. Ack the interrupt via control queue.
> > 6. Resume device to continue packet processing.
> >
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> >   drivers/net/virtio/virtio_ethdev.c | 95
> +++++++++++++++++++++++++++++++++++++-
> >   drivers/net/virtio/virtio_ethdev.h |  1 +
> >   drivers/net/virtio/virtqueue.h     | 11 +++++
> >   3 files changed, 105 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> > index e8ff1e449..9606df514 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -19,6 +19,8 @@
> >   #include <rte_pci.h>
> >   #include <rte_bus_pci.h>
> >   #include <rte_ether.h>
> > +#include <rte_ip.h>
> > +#include <rte_arp.h>
> >   #include <rte_common.h>
> >   #include <rte_errno.h>
> >   #include <rte_cpuflags.h>
> > @@ -78,6 +80,11 @@ static int virtio_dev_queue_stats_mapping_set(
> >   	uint8_t stat_idx,
> >   	uint8_t is_rx);
> >
> > +static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
> > +		const struct ether_addr *mac);
> > +static void virtio_notify_peers(struct rte_eth_dev *dev);
> > +static void virtio_ack_link_announce(struct rte_eth_dev *dev);
> > +
> >   /*
> >    * The set of PCI devices this driver supports
> >    */
> > @@ -1272,9 +1279,89 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct
> rte_mbuf **tx_pkts,
> >   	return ret;
> >   }
> >
> > +#define RARP_PKT_SIZE	64
> > +static int
> > +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr
> *mac)
> > +{
> > +	struct ether_hdr *eth_hdr;
> > +	struct arp_hdr *rarp;
> > +
> > +	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
> > +		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
> > +				rarp_mbuf->buf_len, RARP_PKT_SIZE);
> > +		return -1;
> > +	}
> > +
> > +	/* Ethernet header. */
> > +	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
> > +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> > +	ether_addr_copy(mac, &eth_hdr->s_addr);
> > +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> > +
> > +	/* RARP header. */
> > +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> > +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> > +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> > +	rarp->arp_hln = ETHER_ADDR_LEN;
> > +	rarp->arp_pln = 4;
> > +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> > +
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> > +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> > +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> > +
> > +	rarp_mbuf->data_len = RARP_PKT_SIZE;
> > +	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
> > +
> > +	return 0;
> > +}
> 
> Do you think it could make sense to have this function in a lib, as
> vhost user lib does exactly the same?
> 
> I don't know if it could be useful to others than vhost/virtio though.
> 
> Thanks,
> Maxime

Hi Thomas,

Do you think it's worth adding a new helper for ARP in lib/librte_net/?
Currently we just need a helper to build RARP packet (the above make_rarp_packet)

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-09  8:49                           ` Maxime Coquelin
  2018-01-09 10:58                             ` Wang, Xiao W
@ 2018-01-09 11:03                             ` Wang, Xiao W
  2018-01-09 11:41                               ` Thomas Monjalon
  1 sibling, 1 reply; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-09 11:03 UTC (permalink / raw)
  To: thomas, Maxime Coquelin, yliu; +Cc: Bie, Tiwei, dev, stephen

> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> On 01/09/2018 03:26 PM, Xiao Wang wrote:
> > When live migration is done, for the backup VM, either the virtio
> > frontend or the vhost backend needs to send out gratuitous RARP packet
> > to announce its new network location.
> >
> > This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> live
> > migration scenario where the vhost backend doesn't have the ability to
> > generate RARP packet.
> >
> > Brief introduction of the work flow:
> > 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> > 2. Virtio interrupt handler reads out the interrupt status value, and
> >     realizes it needs to send out RARP packet to announce its location.
> > 3. Pause device to stop worker thread touching the queues.
> > 4. Inject a RARP packet into a Tx Queue.
> > 5. Ack the interrupt via control queue.
> > 6. Resume device to continue packet processing.
> >
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> >   drivers/net/virtio/virtio_ethdev.c | 95
> +++++++++++++++++++++++++++++++++++++-
> >   drivers/net/virtio/virtio_ethdev.h |  1 +
> >   drivers/net/virtio/virtqueue.h     | 11 +++++
> >   3 files changed, 105 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> > index e8ff1e449..9606df514 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -19,6 +19,8 @@
> >   #include <rte_pci.h>
> >   #include <rte_bus_pci.h>
> >   #include <rte_ether.h>
> > +#include <rte_ip.h>
> > +#include <rte_arp.h>
> >   #include <rte_common.h>
> >   #include <rte_errno.h>
> >   #include <rte_cpuflags.h>
> > @@ -78,6 +80,11 @@ static int virtio_dev_queue_stats_mapping_set(
> >   	uint8_t stat_idx,
> >   	uint8_t is_rx);
> >
> > +static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
> > +		const struct ether_addr *mac);
> > +static void virtio_notify_peers(struct rte_eth_dev *dev);
> > +static void virtio_ack_link_announce(struct rte_eth_dev *dev);
> > +
> >   /*
> >    * The set of PCI devices this driver supports
> >    */
> > @@ -1272,9 +1279,89 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct
> rte_mbuf **tx_pkts,
> >   	return ret;
> >   }
> >
> > +#define RARP_PKT_SIZE	64
> > +static int
> > +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr
> *mac)
> > +{
> > +	struct ether_hdr *eth_hdr;
> > +	struct arp_hdr *rarp;
> > +
> > +	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
> > +		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
> > +				rarp_mbuf->buf_len, RARP_PKT_SIZE);
> > +		return -1;
> > +	}
> > +
> > +	/* Ethernet header. */
> > +	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
> > +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> > +	ether_addr_copy(mac, &eth_hdr->s_addr);
> > +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> > +
> > +	/* RARP header. */
> > +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> > +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> > +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> > +	rarp->arp_hln = ETHER_ADDR_LEN;
> > +	rarp->arp_pln = 4;
> > +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> > +
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> > +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> > +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> > +
> > +	rarp_mbuf->data_len = RARP_PKT_SIZE;
> > +	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
> > +
> > +	return 0;
> > +}
> 
> Do you think it could make sense to have this function in a lib, as
> vhost user lib does exactly the same?
> 
> I don't know if it could be useful to others than vhost/virtio though.

Hi Thomas,

Do you think it's worth adding a new helper for ARP in lib/librte_net/?
Currently we just need a helper to build RARP packet (the above make_rarp_packet)

BRs,
Xiao


> 
> Thanks,
> Maxime

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-09 11:03                             ` Wang, Xiao W
@ 2018-01-09 11:41                               ` Thomas Monjalon
  2018-01-09 13:36                                 ` Yuanhan Liu
  0 siblings, 1 reply; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-09 11:41 UTC (permalink / raw)
  To: Wang, Xiao W; +Cc: Maxime Coquelin, yliu, Bie, Tiwei, dev, stephen

09/01/2018 12:03, Wang, Xiao W:
> > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > On 01/09/2018 03:26 PM, Xiao Wang wrote:
> > > +#define RARP_PKT_SIZE	64
> > > +static int
> > > +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr
> > *mac)
> > > +{
> > > +	struct ether_hdr *eth_hdr;
> > > +	struct arp_hdr *rarp;
> > > +
> > > +	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
> > > +		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
> > > +				rarp_mbuf->buf_len, RARP_PKT_SIZE);
> > > +		return -1;
> > > +	}
> > > +
> > > +	/* Ethernet header. */
> > > +	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
> > > +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> > > +	ether_addr_copy(mac, &eth_hdr->s_addr);
> > > +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> > > +
> > > +	/* RARP header. */
> > > +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> > > +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> > > +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> > > +	rarp->arp_hln = ETHER_ADDR_LEN;
> > > +	rarp->arp_pln = 4;
> > > +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> > > +
> > > +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> > > +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> > > +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> > > +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> > > +
> > > +	rarp_mbuf->data_len = RARP_PKT_SIZE;
> > > +	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
> > > +
> > > +	return 0;
> > > +}
> > 
> > Do you think it could make sense to have this function in a lib, as
> > vhost user lib does exactly the same?
> > 
> > I don't know if it could be useful to others than vhost/virtio though.
> 
> Hi Thomas,
> 
> Do you think it's worth adding a new helper for ARP in lib/librte_net/?
> Currently we just need a helper to build RARP packet (the above make_rarp_packet)

Yes, good idea

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v8 0/5] net/virtio: support GUEST ANNOUNCE
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09  8:49                           ` Maxime Coquelin
@ 2018-01-09 13:26                           ` Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 1/5] net/virtio: make control queue thread-safe Xiao Wang
                                               ` (5 more replies)
  1 sibling, 6 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 13:26 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, Xiao Wang

v8:
- Add a helper in lib/librte_net to make rarp packet, it's used by
  both vhost and virtio.

v7:
- Improve comment for state_lock.
- Rename spinlock variable 'sl' to 'lock'.

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (5):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net: add a helper for making RARP packet
  vhost: use lib API to make RARP packet
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 118 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 lib/Makefile                            |   3 +-
 lib/librte_net/Makefile                 |   1 +
 lib/librte_net/rte_arp.c                |  42 ++++++++++++
 lib/librte_net/rte_arp.h                |  14 ++++
 lib/librte_net/rte_net_version.map      |   6 ++
 lib/librte_vhost/Makefile               |   2 +-
 lib/librte_vhost/virtio_net.c           |  41 +----------
 14 files changed, 210 insertions(+), 47 deletions(-)
 create mode 100644 lib/librte_net/rte_arp.c

-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v8 1/5] net/virtio: make control queue thread-safe
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
@ 2018-01-09 13:26                             ` Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 2/5] net/virtio: add packet injection method Xiao Wang
                                               ` (4 subsequent siblings)
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 13:26 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 21f2131a9..4e613ce30 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -148,6 +148,8 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->lock);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -155,8 +157,10 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->lock);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -232,6 +236,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->lock);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 994458ced..265debf20 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -378,6 +378,7 @@ virtio_dev_cq_start(struct rte_eth_dev *dev)
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->lock);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index ca546ccaa..49e9d98ee 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -55,6 +55,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t lock;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v8 2/5] net/virtio: add packet injection method
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 1/5] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-09 13:26                             ` Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet Xiao Wang
                                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 13:26 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts APIs to allow
driver to pause the worker threads and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 56 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  5 +++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 4e613ce30..e8ff1e449 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -26,6 +26,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1220,6 +1221,57 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1757,6 +1809,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1923,12 +1977,14 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 765d249e6..69b30b7e1 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -92,4 +92,9 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index fb1f6a9ec..9d810a599 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -241,6 +241,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change device state, this lock is meant to avoid
+	 * such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_pkts;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 265debf20..80e996d06 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -988,7 +988,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8ef3c0c04..98a9da5d8 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -70,7 +70,7 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 1/5] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 2/5] net/virtio: add packet injection method Xiao Wang
@ 2018-01-09 13:26                             ` Xiao Wang
  2018-01-09 13:48                               ` Thomas Monjalon
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 4/5] vhost: use lib API to make RARP packet Xiao Wang
                                               ` (2 subsequent siblings)
  5 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 13:26 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, Xiao Wang

Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 lib/librte_net/Makefile            |  1 +
 lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_arp.h           | 14 +++++++++++++
 lib/librte_net/rte_net_version.map |  6 ++++++
 4 files changed, 63 insertions(+)
 create mode 100644 lib/librte_net/rte_arp.c

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 5e8a76b68..ab290c382 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -13,6 +13,7 @@ LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
 SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
new file mode 100644
index 000000000..d7223b044
--- /dev/null
+++ b/lib/librte_net/rte_arp.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <arpa/inet.h>
+
+#include <rte_arp.h>
+
+#define RARP_PKT_SIZE	64
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+
+	if (mbuf->buf_len < RARP_PKT_SIZE)
+		return -1;
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	mbuf->data_len = RARP_PKT_SIZE;
+	mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index 183641874..375635967 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -76,6 +76,20 @@ struct arp_hdr {
 	struct arp_ipv4 arp_data;
 } __attribute__((__packed__));
 
+/**
+ * Make a RARP packet based on MAC addr.
+ *
+ * @param mbuf
+ *   Pointer to the rte_mbuf structure
+ * @param mac
+ *   Pointer to the MAC addr
+ *
+ * @return
+ *   - 0 on success, negative on error
+ */
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 687c40eaf..c28856c73 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -12,3 +12,9 @@ DPDK_17.05 {
 	rte_net_crc_set_alg;
 
 } DPDK_16.11;
+
+DPDK_18.02 {
+	global:
+
+	rte_net_make_rarp_packet;
+} DPDK_17.05;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v8 4/5] vhost: use lib API to make RARP packet
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
                                               ` (2 preceding siblings ...)
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-09 13:26                             ` Xiao Wang
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 14:38                             ` [dpdk-dev] [PATCH v8 0/5] " Maxime Coquelin
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 13:26 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, Xiao Wang

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 lib/Makefile                  |  3 ++-
 lib/librte_vhost/Makefile     |  2 +-
 lib/librte_vhost/virtio_net.c | 41 +----------------------------------------
 3 files changed, 4 insertions(+), 42 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index 420270252..679912a28 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -31,7 +31,8 @@ DEPDIRS-librte_security += librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
 DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
-DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether \
+			librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 3b8cd27a3..065d5c469 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -17,7 +17,7 @@ LDLIBS += -lpthread
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2227ced88..f58efc0e6 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -813,45 +813,6 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
-#define RARP_PKT_SIZE	64
-
-static int
-make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
-{
-	struct ether_hdr *eth_hdr;
-	struct arp_hdr  *rarp;
-
-	if (rarp_mbuf->buf_len < 64) {
-		RTE_LOG(WARNING, VHOST_DATA,
-			"failed to make RARP; mbuf size too small %u (< %d)\n",
-			rarp_mbuf->buf_len, RARP_PKT_SIZE);
-		return -1;
-	}
-
-	/* Ethernet header. */
-	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
-	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
-	ether_addr_copy(mac, &eth_hdr->s_addr);
-	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
-
-	/* RARP header. */
-	rarp = (struct arp_hdr *)(eth_hdr + 1);
-	rarp->arp_hrd = htons(ARP_HRD_ETHER);
-	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
-	rarp->arp_hln = ETHER_ADDR_LEN;
-	rarp->arp_pln = 4;
-	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
-
-	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
-	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
-	memset(&rarp->arp_data.arp_sip, 0x00, 4);
-	memset(&rarp->arp_data.arp_tip, 0x00, 4);
-
-	rarp_mbuf->pkt_len  = rarp_mbuf->data_len = RARP_PKT_SIZE;
-
-	return 0;
-}
-
 static __rte_always_inline void
 put_zmbuf(struct zcopy_mbuf *zmbuf)
 {
@@ -1214,7 +1175,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			return 0;
 		}
 
-		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
+		if (rte_net_make_rarp_packet(rarp_mbuf, &dev->mac) < 0) {
 			rte_pktmbuf_free(rarp_mbuf);
 			rarp_mbuf = NULL;
 		} else {
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v8 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
                                               ` (3 preceding siblings ...)
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 4/5] vhost: use lib API to make RARP packet Xiao Wang
@ 2018-01-09 13:26                             ` Xiao Wang
  2018-01-09 14:38                             ` [dpdk-dev] [PATCH v8 0/5] " Maxime Coquelin
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 13:26 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 55 ++++++++++++++++++++++++++++++++++++--
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 ++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e8ff1e449..3f616453f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -19,6 +19,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -78,6 +80,9 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1272,9 +1277,51 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
 	return ret;
 }
 
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_pktmbuf_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (rte_net_make_rarp_packet(rarp_mbuf,
+			(struct ether_addr *)hw->mac_addr) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+
+	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1297,6 +1344,10 @@ virtio_interrupt_handler(void *param)
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 69b30b7e1..09ebc5fb5 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -38,6 +38,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |	\
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 1482a951d..60df359b3 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -129,6 +129,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-09 11:41                               ` Thomas Monjalon
@ 2018-01-09 13:36                                 ` Yuanhan Liu
  0 siblings, 0 replies; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-09 13:36 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Wang, Xiao W, Maxime Coquelin, Bie, Tiwei, dev, stephen

On Tue, Jan 09, 2018 at 12:41:53PM +0100, Thomas Monjalon wrote:
> > > Do you think it could make sense to have this function in a lib, as
> > > vhost user lib does exactly the same?
> > > 
> > > I don't know if it could be useful to others than vhost/virtio though.
> > 
> > Hi Thomas,
> > 
> > Do you think it's worth adding a new helper for ARP in lib/librte_net/?
> > Currently we just need a helper to build RARP packet (the above make_rarp_packet)
> 
> Yes, good idea

+1

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-09 13:48                               ` Thomas Monjalon
  2018-01-09 15:52                                 ` Wang, Xiao W
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  1 sibling, 1 reply; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-09 13:48 UTC (permalink / raw)
  To: Xiao Wang; +Cc: yliu, tiwei.bie, dev, stephen

09/01/2018 14:26, Xiao Wang:
> +/**
> + * Make a RARP packet based on MAC addr.
> + *
> + * @param mbuf
> + *   Pointer to the rte_mbuf structure
> + * @param mac
> + *   Pointer to the MAC addr
> + *
> + * @return
> + *   - 0 on success, negative on error
> + */
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);

I think we should apply the new policy of introducting functions
with the experimental state.

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE
  2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-08 13:03                       ` Yuanhan Liu
@ 2018-01-09 14:26                       ` Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 1/3] net/virtio: make control queue thread-safe Xiao Wang
                                           ` (2 more replies)
  1 sibling, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 14:26 UTC (permalink / raw)
  To: yliu; +Cc: tiwei.bie, dev, stephen, Xiao Wang

v7:
- Improve comment for state_lock.
- Rename spinlock variable 'sl' to 'lock'.

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (3):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 158 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 7 files changed, 183 insertions(+), 5 deletions(-)

-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v7 1/3] net/virtio: make control queue thread-safe
  2018-01-09 14:26                       ` [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-09 14:26                         ` Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 2/3] net/virtio: add packet injection method Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 14:26 UTC (permalink / raw)
  To: yliu; +Cc: tiwei.bie, dev, stephen, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 21f2131a9..4e613ce30 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -148,6 +148,8 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->lock);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -155,8 +157,10 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->lock);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -232,6 +236,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->lock);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 994458ced..265debf20 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -378,6 +378,7 @@ virtio_dev_cq_start(struct rte_eth_dev *dev)
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->lock);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index ca546ccaa..49e9d98ee 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -55,6 +55,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t lock;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v7 2/3] net/virtio: add packet injection method
  2018-01-09 14:26                       ` [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 1/3] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-09 14:26                         ` Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 14:26 UTC (permalink / raw)
  To: yliu; +Cc: tiwei.bie, dev, stephen, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts api to allow
driver to pause the worker threads and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 56 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  5 +++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 4e613ce30..e8ff1e449 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -26,6 +26,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1220,6 +1221,57 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1757,6 +1809,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1923,12 +1977,14 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 765d249e6..69b30b7e1 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -92,4 +92,9 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index fb1f6a9ec..9d810a599 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -241,6 +241,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change device state, this lock is meant to avoid
+	 * such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_pkts;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 265debf20..80e996d06 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -988,7 +988,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8ef3c0c04..98a9da5d8 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -70,7 +70,7 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE
  2018-01-09 14:26                       ` [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 1/3] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 2/3] net/virtio: add packet injection method Xiao Wang
@ 2018-01-09 14:26                         ` Xiao Wang
  2018-01-09  8:49                           ` Maxime Coquelin
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
  2 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 14:26 UTC (permalink / raw)
  To: yliu; +Cc: tiwei.bie, dev, stephen, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 95 +++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 +++++
 3 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e8ff1e449..9606df514 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -19,6 +19,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -78,6 +80,11 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static int make_rarp_packet(struct rte_mbuf *rarp_mbuf,
+		const struct ether_addr *mac);
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1272,9 +1279,89 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
 	return ret;
 }
 
+#define RARP_PKT_SIZE	64
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+
+	if (rarp_mbuf->buf_len < RARP_PKT_SIZE) {
+		PMD_DRV_LOG(ERR, "mbuf size too small %u (< %d)",
+				rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(rarp_mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->data_len = RARP_PKT_SIZE;
+	rarp_mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_pktmbuf_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (make_rarp_packet(rarp_mbuf,
+			(struct ether_addr *)hw->mac_addr) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+
+	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1297,6 +1384,10 @@ virtio_interrupt_handler(void *param)
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 69b30b7e1..09ebc5fb5 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -38,6 +38,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |	\
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 1482a951d..60df359b3 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -129,6 +129,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v8 0/5] net/virtio: support GUEST ANNOUNCE
  2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
                                               ` (4 preceding siblings ...)
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-09 14:38                             ` Maxime Coquelin
  5 siblings, 0 replies; 112+ messages in thread
From: Maxime Coquelin @ 2018-01-09 14:38 UTC (permalink / raw)
  To: Xiao Wang, yliu, thomas; +Cc: tiwei.bie, dev, stephen



On 01/09/2018 02:26 PM, Xiao Wang wrote:
> v8:
> - Add a helper in lib/librte_net to make rarp packet, it's used by
>    both vhost and virtio.
> 
> v7:
> - Improve comment for state_lock.
> - Rename spinlock variable 'sl' to 'lock'.
> 
> v6:
> - Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
> - Remove the 'len' parameter in calling virtio_send_command().
> - Remove extra space between typo and var.
> - Improve comment and alignment.
> - Remove the unnecessary header file.
> - A better usage of 'unlikely' indication.
> 
> v5:
> - Remove txvq parameter in virtio_inject_pkts.
> - Zero hw->special_buf after using it.
> - Return the retval of tx_pkt_burst().
> - Allocate a mbuf pointer on stack directly.
> 
> v4:
> - Move spinlock lock/unlock into dev_pause/resume.
> - Separate out a patch for packet injection.
> 
> v3:
> - Remove Tx function code duplication, use a special pointer for rarp
>    injection.
> - Rename function generate_rarp to virtio_notify_peers, replace
>    'virtnet_' with 'virtio_'.
> - Add comment for state_lock.
> - Typo fix and comment improvement.
> 
> v2:
> - Use spaces instead of tabs between the code and comments.
> - Remove unnecessary parentheses.
> - Use rte_pktmbuf_mtod directly to get eth_hdr addr.
> - Fix virtio_dev_pause return value check.
> 
> Xiao Wang (5):
>    net/virtio: make control queue thread-safe
>    net/virtio: add packet injection method
>    net: add a helper for making RARP packet
For for handling the change!

>    vhost: use lib API to make RARP packet
>    net/virtio: support GUEST ANNOUNCE
> 
>   drivers/net/virtio/virtio_ethdev.c      | 118 +++++++++++++++++++++++++++++++-
>   drivers/net/virtio/virtio_ethdev.h      |   6 ++
>   drivers/net/virtio/virtio_pci.h         |   7 ++
>   drivers/net/virtio/virtio_rxtx.c        |   3 +-
>   drivers/net/virtio/virtio_rxtx.h        |   1 +
>   drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
>   drivers/net/virtio/virtqueue.h          |  11 +++
>   lib/Makefile                            |   3 +-
>   lib/librte_net/Makefile                 |   1 +
>   lib/librte_net/rte_arp.c                |  42 ++++++++++++
>   lib/librte_net/rte_arp.h                |  14 ++++
>   lib/librte_net/rte_net_version.map      |   6 ++
>   lib/librte_vhost/Makefile               |   2 +-
>   lib/librte_vhost/virtio_net.c           |  41 +----------
>   14 files changed, 210 insertions(+), 47 deletions(-)
>   create mode 100644 lib/librte_net/rte_arp.c
> 

For the series:
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Maxime

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet
  2018-01-09 13:48                               ` Thomas Monjalon
@ 2018-01-09 15:52                                 ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-09 15:52 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: yliu, Bie, Tiwei, dev, stephen

Hi,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Tuesday, January 9, 2018 9:49 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; Bie, Tiwei <tiwei.bie@intel.com>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: Re: [PATCH v8 3/5] net: add a helper for making RARP packet
> 
> 09/01/2018 14:26, Xiao Wang:
> > +/**
> > + * Make a RARP packet based on MAC addr.
> > + *
> > + * @param mbuf
> > + *   Pointer to the rte_mbuf structure
> > + * @param mac
> > + *   Pointer to the MAC addr
> > + *
> > + * @return
> > + *   - 0 on success, negative on error
> > + */
> > +int
> > +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr
> *mac);
> 
> I think we should apply the new policy of introducting functions
> with the experimental state.

OK, will change it soon.

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE
  2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet Xiao Wang
  2018-01-09 13:48                               ` Thomas Monjalon
@ 2018-01-09 16:09                               ` Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 1/5] net/virtio: make control queue thread-safe Xiao Wang
                                                   ` (4 more replies)
  1 sibling, 5 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 16:09 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

v9:
- Introduce function with the experimental state.

v8:
- Add a helper in lib/librte_net to make rarp packet, it's used by
  both vhost and virtio.

v7:
- Improve comment for state_lock.
- Rename spinlock variable 'sl' to 'lock'.

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (5):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net: add a helper for making RARP packet
  vhost: use lib API to make RARP packet
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 118 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 lib/Makefile                            |   3 +-
 lib/librte_net/Makefile                 |   1 +
 lib/librte_net/rte_arp.c                |  42 ++++++++++++
 lib/librte_net/rte_arp.h                |  14 ++++
 lib/librte_net/rte_net_version.map      |   6 ++
 lib/librte_vhost/Makefile               |   2 +-
 lib/librte_vhost/virtio_net.c           |  41 +----------
 14 files changed, 210 insertions(+), 47 deletions(-)
 create mode 100644 lib/librte_net/rte_arp.c

-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v9 1/5] net/virtio: make control queue thread-safe
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-09 16:09                                 ` Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 2/5] net/virtio: add packet injection method Xiao Wang
                                                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 16:09 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 21f2131a9..4e613ce30 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -148,6 +148,8 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->lock);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -155,8 +157,10 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->lock);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -232,6 +236,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->lock);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 994458ced..265debf20 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -378,6 +378,7 @@ virtio_dev_cq_start(struct rte_eth_dev *dev)
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->lock);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index ca546ccaa..49e9d98ee 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -55,6 +55,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t lock;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v9 2/5] net/virtio: add packet injection method
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 1/5] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-09 16:09                                 ` Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet Xiao Wang
                                                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 16:09 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts APIs to allow
driver to pause the worker threads and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 56 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  5 +++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 4e613ce30..e8ff1e449 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -26,6 +26,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1220,6 +1221,57 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1757,6 +1809,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1923,12 +1977,14 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 765d249e6..69b30b7e1 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -92,4 +92,9 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index fb1f6a9ec..9d810a599 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -241,6 +241,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change device state, this lock is meant to avoid
+	 * such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_pkts;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 265debf20..80e996d06 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -988,7 +988,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8ef3c0c04..98a9da5d8 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -70,7 +70,7 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 1/5] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 2/5] net/virtio: add packet injection method Xiao Wang
@ 2018-01-09 16:09                                 ` Xiao Wang
  2018-01-09 17:22                                   ` Thomas Monjalon
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 4/5] vhost: use lib API to make RARP packet Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  4 siblings, 2 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 16:09 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_net/Makefile            |  1 +
 lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_arp.h           | 14 +++++++++++++
 lib/librte_net/rte_net_version.map |  6 ++++++
 4 files changed, 63 insertions(+)
 create mode 100644 lib/librte_net/rte_arp.c

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 5e8a76b68..ab290c382 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -13,6 +13,7 @@ LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
 SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
new file mode 100644
index 000000000..d7223b044
--- /dev/null
+++ b/lib/librte_net/rte_arp.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <arpa/inet.h>
+
+#include <rte_arp.h>
+
+#define RARP_PKT_SIZE	64
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+
+	if (mbuf->buf_len < RARP_PKT_SIZE)
+		return -1;
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	mbuf->data_len = RARP_PKT_SIZE;
+	mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index 183641874..375635967 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -76,6 +76,20 @@ struct arp_hdr {
 	struct arp_ipv4 arp_data;
 } __attribute__((__packed__));
 
+/**
+ * Make a RARP packet based on MAC addr.
+ *
+ * @param mbuf
+ *   Pointer to the rte_mbuf structure
+ * @param mac
+ *   Pointer to the MAC addr
+ *
+ * @return
+ *   - 0 on success, negative on error
+ */
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 687c40eaf..213e6fd32 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -12,3 +12,9 @@ DPDK_17.05 {
 	rte_net_crc_set_alg;
 
 } DPDK_16.11;
+
+EXPERIMENTAL {
+	global:
+
+	rte_net_make_rarp_packet;
+} DPDK_17.05;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v9 4/5] vhost: use lib API to make RARP packet
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                   ` (2 preceding siblings ...)
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-09 16:09                                 ` Xiao Wang
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 16:09 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/Makefile                  |  3 ++-
 lib/librte_vhost/Makefile     |  2 +-
 lib/librte_vhost/virtio_net.c | 41 +----------------------------------------
 3 files changed, 4 insertions(+), 42 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index 420270252..679912a28 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -31,7 +31,8 @@ DEPDIRS-librte_security += librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
 DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
-DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether \
+			librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 3b8cd27a3..065d5c469 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -17,7 +17,7 @@ LDLIBS += -lpthread
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2227ced88..f58efc0e6 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -813,45 +813,6 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
-#define RARP_PKT_SIZE	64
-
-static int
-make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
-{
-	struct ether_hdr *eth_hdr;
-	struct arp_hdr  *rarp;
-
-	if (rarp_mbuf->buf_len < 64) {
-		RTE_LOG(WARNING, VHOST_DATA,
-			"failed to make RARP; mbuf size too small %u (< %d)\n",
-			rarp_mbuf->buf_len, RARP_PKT_SIZE);
-		return -1;
-	}
-
-	/* Ethernet header. */
-	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
-	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
-	ether_addr_copy(mac, &eth_hdr->s_addr);
-	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
-
-	/* RARP header. */
-	rarp = (struct arp_hdr *)(eth_hdr + 1);
-	rarp->arp_hrd = htons(ARP_HRD_ETHER);
-	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
-	rarp->arp_hln = ETHER_ADDR_LEN;
-	rarp->arp_pln = 4;
-	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
-
-	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
-	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
-	memset(&rarp->arp_data.arp_sip, 0x00, 4);
-	memset(&rarp->arp_data.arp_tip, 0x00, 4);
-
-	rarp_mbuf->pkt_len  = rarp_mbuf->data_len = RARP_PKT_SIZE;
-
-	return 0;
-}
-
 static __rte_always_inline void
 put_zmbuf(struct zcopy_mbuf *zmbuf)
 {
@@ -1214,7 +1175,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			return 0;
 		}
 
-		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
+		if (rte_net_make_rarp_packet(rarp_mbuf, &dev->mac) < 0) {
 			rte_pktmbuf_free(rarp_mbuf);
 			rarp_mbuf = NULL;
 		} else {
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v9 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                   ` (3 preceding siblings ...)
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 4/5] vhost: use lib API to make RARP packet Xiao Wang
@ 2018-01-09 16:09                                 ` Xiao Wang
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-09 16:09 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 55 ++++++++++++++++++++++++++++++++++++--
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 ++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e8ff1e449..3f616453f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -19,6 +19,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -78,6 +80,9 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1272,9 +1277,51 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
 	return ret;
 }
 
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_pktmbuf_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (rte_net_make_rarp_packet(rarp_mbuf,
+			(struct ether_addr *)hw->mac_addr) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+
+	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1297,6 +1344,10 @@ virtio_interrupt_handler(void *param)
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 69b30b7e1..09ebc5fb5 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -38,6 +38,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |	\
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 1482a951d..60df359b3 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -129,6 +129,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-09 17:22                                   ` Thomas Monjalon
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  1 sibling, 0 replies; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-09 17:22 UTC (permalink / raw)
  To: Xiao Wang; +Cc: yliu, tiwei.bie, dev, stephen, maxime.coquelin

09/01/2018 17:09, Xiao Wang:
> --- a/lib/librte_net/rte_arp.h
> +++ b/lib/librte_net/rte_arp.h
> @@ -76,6 +76,20 @@ struct arp_hdr {
>  	struct arp_ipv4 arp_data;
>  } __attribute__((__packed__));
>  
> +/**
> + * Make a RARP packet based on MAC addr.
> + *
> + * @param mbuf
> + *   Pointer to the rte_mbuf structure
> + * @param mac
> + *   Pointer to the MAC addr
> + *
> + * @return
> + *   - 0 on success, negative on error
> + */
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);

Please check how experimental state is advertised for other functions.
Usually we add a bold doxygen comment.

[...]
> --- a/lib/librte_net/rte_net_version.map
> +++ b/lib/librte_net/rte_net_version.map
> +EXPERIMENTAL {
> +	global:
> +
> +	rte_net_make_rarp_packet;
> +} DPDK_17.05;
> 

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE
  2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet Xiao Wang
  2018-01-09 17:22                                   ` Thomas Monjalon
@ 2018-01-10  1:23                                   ` Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 1/5] net/virtio: make control queue thread-safe Xiao Wang
                                                       ` (4 more replies)
  1 sibling, 5 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-10  1:23 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

When live migration is finished, the backup VM needs to proactively announce
its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
generate a RARP packet to switch in dequeue path. Another method is to let
the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
feature.

This patch set enables this feature in virtio pmd, to support VM running virtio
pmd be migrated without vhost supporting RARP generation.

v10:
- Add a bold doxygen comment for the experimental function.

v9:
- Introduce function with the experimental state.

v8:
- Add a helper in lib/librte_net to make rarp packet, it's used by
  both vhost and virtio.

v7:
- Improve comment for state_lock.
- Rename spinlock variable 'sl' to 'lock'.

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (5):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net: add a helper for making RARP packet
  vhost: use lib API to make RARP packet
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 118 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 +++
 lib/Makefile                            |   3 +-
 lib/librte_net/Makefile                 |   1 +
 lib/librte_net/rte_arp.c                |  42 ++++++++++++
 lib/librte_net/rte_arp.h                |  17 +++++
 lib/librte_net/rte_net_version.map      |   6 ++
 lib/librte_vhost/Makefile               |   2 +-
 lib/librte_vhost/virtio_net.c           |  41 +----------
 14 files changed, 213 insertions(+), 47 deletions(-)
 create mode 100644 lib/librte_net/rte_arp.c

-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v10 1/5] net/virtio: make control queue thread-safe
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-10  1:23                                     ` Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 2/5] net/virtio: add packet injection method Xiao Wang
                                                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-10  1:23 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 21f2131a9..4e613ce30 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -148,6 +148,8 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->lock);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -155,8 +157,10 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->lock);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -232,6 +236,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->lock);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 994458ced..265debf20 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -378,6 +378,7 @@ virtio_dev_cq_start(struct rte_eth_dev *dev)
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->lock);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index ca546ccaa..49e9d98ee 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -55,6 +55,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t lock;              /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v10 2/5] net/virtio: add packet injection method
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 1/5] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-10  1:23                                     ` Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet Xiao Wang
                                                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-10  1:23 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts APIs to allow
driver to pause the worker threads and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 56 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  5 +++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 4e613ce30..e8ff1e449 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -26,6 +26,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1220,6 +1221,57 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1757,6 +1809,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1923,12 +1977,14 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 765d249e6..69b30b7e1 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -92,4 +92,9 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index fb1f6a9ec..9d810a599 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -241,6 +241,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change device state, this lock is meant to avoid
+	 * such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_pkts;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 265debf20..80e996d06 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -988,7 +988,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8ef3c0c04..98a9da5d8 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -70,7 +70,7 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 1/5] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 2/5] net/virtio: add packet injection method Xiao Wang
@ 2018-01-10  1:23                                     ` Xiao Wang
  2018-01-10 13:06                                       ` Yuanhan Liu
                                                         ` (2 more replies)
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 4/5] vhost: use lib API to make RARP packet Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  4 siblings, 3 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-10  1:23 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_net/Makefile            |  1 +
 lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_arp.h           | 17 +++++++++++++++
 lib/librte_net/rte_net_version.map |  6 ++++++
 4 files changed, 66 insertions(+)
 create mode 100644 lib/librte_net/rte_arp.c

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 5e8a76b68..ab290c382 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -13,6 +13,7 @@ LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
 SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
new file mode 100644
index 000000000..d7223b044
--- /dev/null
+++ b/lib/librte_net/rte_arp.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <arpa/inet.h>
+
+#include <rte_arp.h>
+
+#define RARP_PKT_SIZE	64
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+
+	if (mbuf->buf_len < RARP_PKT_SIZE)
+		return -1;
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	mbuf->data_len = RARP_PKT_SIZE;
+	mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index 183641874..dad7423ad 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -76,6 +76,23 @@ struct arp_hdr {
 	struct arp_ipv4 arp_data;
 } __attribute__((__packed__));
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Make a RARP packet based on MAC addr.
+ *
+ * @param mbuf
+ *   Pointer to the rte_mbuf structure
+ * @param mac
+ *   Pointer to the MAC addr
+ *
+ * @return
+ *   - 0 on success, negative on error
+ */
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 687c40eaf..213e6fd32 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -12,3 +12,9 @@ DPDK_17.05 {
 	rte_net_crc_set_alg;
 
 } DPDK_16.11;
+
+EXPERIMENTAL {
+	global:
+
+	rte_net_make_rarp_packet;
+} DPDK_17.05;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v10 4/5] vhost: use lib API to make RARP packet
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                       ` (2 preceding siblings ...)
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-10  1:23                                     ` Xiao Wang
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-10  1:23 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/Makefile                  |  3 ++-
 lib/librte_vhost/Makefile     |  2 +-
 lib/librte_vhost/virtio_net.c | 41 +----------------------------------------
 3 files changed, 4 insertions(+), 42 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index 420270252..679912a28 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -31,7 +31,8 @@ DEPDIRS-librte_security += librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
 DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
-DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether \
+			librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 3b8cd27a3..065d5c469 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -17,7 +17,7 @@ LDLIBS += -lpthread
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2227ced88..f58efc0e6 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -813,45 +813,6 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
-#define RARP_PKT_SIZE	64
-
-static int
-make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
-{
-	struct ether_hdr *eth_hdr;
-	struct arp_hdr  *rarp;
-
-	if (rarp_mbuf->buf_len < 64) {
-		RTE_LOG(WARNING, VHOST_DATA,
-			"failed to make RARP; mbuf size too small %u (< %d)\n",
-			rarp_mbuf->buf_len, RARP_PKT_SIZE);
-		return -1;
-	}
-
-	/* Ethernet header. */
-	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
-	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
-	ether_addr_copy(mac, &eth_hdr->s_addr);
-	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
-
-	/* RARP header. */
-	rarp = (struct arp_hdr *)(eth_hdr + 1);
-	rarp->arp_hrd = htons(ARP_HRD_ETHER);
-	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
-	rarp->arp_hln = ETHER_ADDR_LEN;
-	rarp->arp_pln = 4;
-	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
-
-	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
-	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
-	memset(&rarp->arp_data.arp_sip, 0x00, 4);
-	memset(&rarp->arp_data.arp_tip, 0x00, 4);
-
-	rarp_mbuf->pkt_len  = rarp_mbuf->data_len = RARP_PKT_SIZE;
-
-	return 0;
-}
-
 static __rte_always_inline void
 put_zmbuf(struct zcopy_mbuf *zmbuf)
 {
@@ -1214,7 +1175,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			return 0;
 		}
 
-		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
+		if (rte_net_make_rarp_packet(rarp_mbuf, &dev->mac) < 0) {
 			rte_pktmbuf_free(rarp_mbuf);
 			rarp_mbuf = NULL;
 		} else {
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v10 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                       ` (3 preceding siblings ...)
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 4/5] vhost: use lib API to make RARP packet Xiao Wang
@ 2018-01-10  1:23                                     ` Xiao Wang
  4 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-10  1:23 UTC (permalink / raw)
  To: yliu, thomas; +Cc: tiwei.bie, dev, stephen, maxime.coquelin, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 55 ++++++++++++++++++++++++++++++++++++--
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 ++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e8ff1e449..3f616453f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -19,6 +19,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -78,6 +80,9 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1272,9 +1277,51 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
 	return ret;
 }
 
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_pktmbuf_alloc(rxvq->mpool);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "mbuf allocate failed");
+		return;
+	}
+
+	if (rte_net_make_rarp_packet(rarp_mbuf,
+			(struct ether_addr *)hw->mac_addr) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+
+	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1297,6 +1344,10 @@ virtio_interrupt_handler(void *param)
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 69b30b7e1..09ebc5fb5 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -38,6 +38,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |	\
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 1482a951d..60df359b3 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -129,6 +129,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-10 13:06                                       ` Yuanhan Liu
  2018-01-10 14:10                                         ` Thomas Monjalon
  2018-01-16  9:01                                       ` Olivier Matz
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-10 13:06 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Xiao Wang, tiwei.bie, dev, stephen, maxime.coquelin

Thomas, look good to you?

	--yliu

On Wed, Jan 10, 2018 at 09:23:54AM +0800, Xiao Wang wrote:
> Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_net/Makefile            |  1 +
>  lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
>  lib/librte_net/rte_arp.h           | 17 +++++++++++++++
>  lib/librte_net/rte_net_version.map |  6 ++++++
>  4 files changed, 66 insertions(+)
>  create mode 100644 lib/librte_net/rte_arp.c
> 
> diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> index 5e8a76b68..ab290c382 100644
> --- a/lib/librte_net/Makefile
> +++ b/lib/librte_net/Makefile
> @@ -13,6 +13,7 @@ LIBABIVER := 1
>  
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
> +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
> diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> new file mode 100644
> index 000000000..d7223b044
> --- /dev/null
> +++ b/lib/librte_net/rte_arp.c
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#include <arpa/inet.h>
> +
> +#include <rte_arp.h>
> +
> +#define RARP_PKT_SIZE	64
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr *rarp;
> +
> +	if (mbuf->buf_len < RARP_PKT_SIZE)
> +		return -1;
> +
> +	/* Ethernet header. */
> +	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
> +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> +	ether_addr_copy(mac, &eth_hdr->s_addr);
> +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> +
> +	/* RARP header. */
> +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> +	rarp->arp_hln = ETHER_ADDR_LEN;
> +	rarp->arp_pln = 4;
> +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> +
> +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> +
> +	mbuf->data_len = RARP_PKT_SIZE;
> +	mbuf->pkt_len = RARP_PKT_SIZE;
> +
> +	return 0;
> +}
> diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
> index 183641874..dad7423ad 100644
> --- a/lib/librte_net/rte_arp.h
> +++ b/lib/librte_net/rte_arp.h
> @@ -76,6 +76,23 @@ struct arp_hdr {
>  	struct arp_ipv4 arp_data;
>  } __attribute__((__packed__));
>  
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Make a RARP packet based on MAC addr.
> + *
> + * @param mbuf
> + *   Pointer to the rte_mbuf structure
> + * @param mac
> + *   Pointer to the MAC addr
> + *
> + * @return
> + *   - 0 on success, negative on error
> + */
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
> index 687c40eaf..213e6fd32 100644
> --- a/lib/librte_net/rte_net_version.map
> +++ b/lib/librte_net/rte_net_version.map
> @@ -12,3 +12,9 @@ DPDK_17.05 {
>  	rte_net_crc_set_alg;
>  
>  } DPDK_16.11;
> +
> +EXPERIMENTAL {
> +	global:
> +
> +	rte_net_make_rarp_packet;
> +} DPDK_17.05;
> -- 
> 2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-10 13:06                                       ` Yuanhan Liu
@ 2018-01-10 14:10                                         ` Thomas Monjalon
  0 siblings, 0 replies; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-10 14:10 UTC (permalink / raw)
  To: Yuanhan Liu
  Cc: Xiao Wang, tiwei.bie, dev, stephen, maxime.coquelin, Olivier Matz

10/01/2018 14:06, Yuanhan Liu:
> Thomas, look good to you?

The format looks good, yes.

Cc Olivier, the maintainer.

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet Xiao Wang
  2018-01-10 13:06                                       ` Yuanhan Liu
@ 2018-01-16  9:01                                       ` Olivier Matz
  2018-01-16  9:43                                         ` Wang, Xiao W
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2 siblings, 1 reply; 112+ messages in thread
From: Olivier Matz @ 2018-01-16  9:01 UTC (permalink / raw)
  To: Xiao Wang; +Cc: yliu, thomas, tiwei.bie, dev, stephen, maxime.coquelin

Hi Xiao,

Please find few comments below.

On Wed, Jan 10, 2018 at 09:23:54AM +0800, Xiao Wang wrote:
> Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_net/Makefile            |  1 +
>  lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
>  lib/librte_net/rte_arp.h           | 17 +++++++++++++++
>  lib/librte_net/rte_net_version.map |  6 ++++++
>  4 files changed, 66 insertions(+)
>  create mode 100644 lib/librte_net/rte_arp.c
> 
> diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> index 5e8a76b68..ab290c382 100644
> --- a/lib/librte_net/Makefile
> +++ b/lib/librte_net/Makefile
> @@ -13,6 +13,7 @@ LIBABIVER := 1
>  
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
> +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
> diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> new file mode 100644
> index 000000000..d7223b044
> --- /dev/null
> +++ b/lib/librte_net/rte_arp.c
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#include <arpa/inet.h>
> +
> +#include <rte_arp.h>
> +
> +#define RARP_PKT_SIZE	64
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr *rarp;
> +
> +	if (mbuf->buf_len < RARP_PKT_SIZE)
> +		return -1;
> +
> +	/* Ethernet header. */
> +	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
> +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> +	ether_addr_copy(mac, &eth_hdr->s_addr);
> +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> +
> +	/* RARP header. */
> +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> +	rarp->arp_hln = ETHER_ADDR_LEN;
> +	rarp->arp_pln = 4;
> +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> +
> +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> +
> +	mbuf->data_len = RARP_PKT_SIZE;
> +	mbuf->pkt_len = RARP_PKT_SIZE;
> +
> +	return 0;
> +}

You don't check that there is enough tailroom to write the packet data.
Also, nothing verifies that the mbuf passed to the function is empty.
I suggest to do the allocation in this function, what do you think?

You can also use rte_pktmbuf_append() to check for the tailroom and
update data_len/pkt_len:

	m = rte_pktmbuf_alloc();
	if (m == NULL)
		return NULL;
	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
	if (eth_hdr == NULL) {
		m_freem(m);
		return NULL;
	}
	eth_hdr->... = ...;
	...
	rarp = (struct arp_hdr *)(eth_hdr + 1);
	rarp->... = ...;
	...

	return m;

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-16  9:01                                       ` Olivier Matz
@ 2018-01-16  9:43                                         ` Wang, Xiao W
  2018-01-16 10:42                                           ` Olivier Matz
  0 siblings, 1 reply; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-16  9:43 UTC (permalink / raw)
  To: Olivier Matz; +Cc: yliu, thomas, Bie, Tiwei, dev, stephen, maxime.coquelin

Hi Olivier,

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, January 16, 2018 5:01 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> maxime.coquelin@redhat.com
> Subject: Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> packet
> 
> Hi Xiao,
> 
> Please find few comments below.
> 
> On Wed, Jan 10, 2018 at 09:23:54AM +0800, Xiao Wang wrote:
> > Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >  lib/librte_net/Makefile            |  1 +
> >  lib/librte_net/rte_arp.c           | 42
> ++++++++++++++++++++++++++++++++++++++
> >  lib/librte_net/rte_arp.h           | 17 +++++++++++++++
> >  lib/librte_net/rte_net_version.map |  6 ++++++
> >  4 files changed, 66 insertions(+)
> >  create mode 100644 lib/librte_net/rte_arp.c
> >
> > diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> > index 5e8a76b68..ab290c382 100644
> > --- a/lib/librte_net/Makefile
> > +++ b/lib/librte_net/Makefile
> > @@ -13,6 +13,7 @@ LIBABIVER := 1
> >
> >  SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
> >
> >  # install includes
> >  SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h
> rte_udp.h rte_esp.h
> > diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> > new file mode 100644
> > index 000000000..d7223b044
> > --- /dev/null
> > +++ b/lib/librte_net/rte_arp.c
> > @@ -0,0 +1,42 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Intel Corporation
> > + */
> > +
> > +#include <arpa/inet.h>
> > +
> > +#include <rte_arp.h>
> > +
> > +#define RARP_PKT_SIZE	64
> > +int
> > +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr
> *mac)
> > +{
> > +	struct ether_hdr *eth_hdr;
> > +	struct arp_hdr *rarp;
> > +
> > +	if (mbuf->buf_len < RARP_PKT_SIZE)
> > +		return -1;
> > +
> > +	/* Ethernet header. */
> > +	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
> > +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> > +	ether_addr_copy(mac, &eth_hdr->s_addr);
> > +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> > +
> > +	/* RARP header. */
> > +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> > +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> > +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> > +	rarp->arp_hln = ETHER_ADDR_LEN;
> > +	rarp->arp_pln = 4;
> > +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> > +
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> > +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> > +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> > +
> > +	mbuf->data_len = RARP_PKT_SIZE;
> > +	mbuf->pkt_len = RARP_PKT_SIZE;
> > +
> > +	return 0;
> > +}
> 
> You don't check that there is enough tailroom to write the packet data.

Yes, tailroom can be used.

> Also, nothing verifies that the mbuf passed to the function is empty.
> I suggest to do the allocation in this function, what do you think?
>

I agree to allocate in this function and let it do all the checks.
 
> You can also use rte_pktmbuf_append() to check for the tailroom and
> update data_len/pkt_len:
> 
> 	m = rte_pktmbuf_alloc();
> 	if (m == NULL)
> 		return NULL;
> 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);

When data_len is not enough, we need to rte_pktmbuf_append(m, RARP_PKT_SIZE - m->data_len);

> 	if (eth_hdr == NULL) {
> 		m_freem(m);
> 		return NULL;
> 	}
> 	eth_hdr->... = ...;
> 	...
> 	rarp = (struct arp_hdr *)(eth_hdr + 1);
> 	rarp->... = ...;
> 	...
> 
> 	return m;
> 

Will change it in next version, thanks for the comments.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-16  9:43                                         ` Wang, Xiao W
@ 2018-01-16 10:42                                           ` Olivier Matz
  2018-01-16 11:03                                             ` Wang, Xiao W
  2018-01-16 11:42                                             ` Wang, Xiao W
  0 siblings, 2 replies; 112+ messages in thread
From: Olivier Matz @ 2018-01-16 10:42 UTC (permalink / raw)
  To: Wang, Xiao W; +Cc: yliu, thomas, Bie, Tiwei, dev, stephen, maxime.coquelin

Hi Xiao,

On Tue, Jan 16, 2018 at 09:43:43AM +0000, Wang, Xiao W wrote:
> Hi Olivier,
> > You can also use rte_pktmbuf_append() to check for the tailroom and
> > update data_len/pkt_len:
> > 
> > 	m = rte_pktmbuf_alloc();
> > 	if (m == NULL)
> > 		return NULL;
> > 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
> 
> When data_len is not enough, we need to rte_pktmbuf_append(m, RARP_PKT_SIZE - m->data_len);

Sorry, I don't get your point here.

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-16 10:42                                           ` Olivier Matz
@ 2018-01-16 11:03                                             ` Wang, Xiao W
  2018-01-16 11:42                                             ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-16 11:03 UTC (permalink / raw)
  To: Olivier Matz; +Cc: yliu, thomas, Bie, Tiwei, dev, stephen, maxime.coquelin

Hi Olivier,

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, January 16, 2018 6:43 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> maxime.coquelin@redhat.com
> Subject: Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> packet
> 
> Hi Xiao,
> 
> On Tue, Jan 16, 2018 at 09:43:43AM +0000, Wang, Xiao W wrote:
> > Hi Olivier,
> > > You can also use rte_pktmbuf_append() to check for the tailroom and
> > > update data_len/pkt_len:
> > >
> > > 	m = rte_pktmbuf_alloc();

I just realized that if we let this function to allocate mbuf, it may restrict this api's applicability.
E.g. the caller just has a mbuf, without a mempool.
How do you think?

> > > 	if (m == NULL)
> > > 		return NULL;
> > > 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
> >
> > When data_len is not enough, we need to rte_pktmbuf_append(m,
> RARP_PKT_SIZE - m->data_len);
> 
> Sorry, I don't get your point here.

I mean we just need to extend the data_len by "RARP_PKT_SIZE - m->data_len" when the room is not big enough.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet
  2018-01-16 10:42                                           ` Olivier Matz
  2018-01-16 11:03                                             ` Wang, Xiao W
@ 2018-01-16 11:42                                             ` Wang, Xiao W
  1 sibling, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-16 11:42 UTC (permalink / raw)
  To: Olivier Matz; +Cc: yliu, thomas, Bie, Tiwei, dev, stephen, maxime.coquelin



> -----Original Message-----
> From: Wang, Xiao W
> Sent: Tuesday, January 16, 2018 7:03 PM
> To: 'Olivier Matz' <olivier.matz@6wind.com>
> Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> maxime.coquelin@redhat.com
> Subject: RE: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> packet
> 
> Hi Olivier,
> 
> > -----Original Message-----
> > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > Sent: Tuesday, January 16, 2018 6:43 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> > <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> > maxime.coquelin@redhat.com
> > Subject: Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> > packet
> >
> > Hi Xiao,
> >
> > On Tue, Jan 16, 2018 at 09:43:43AM +0000, Wang, Xiao W wrote:
> > > Hi Olivier,
> > > > You can also use rte_pktmbuf_append() to check for the tailroom and
> > > > update data_len/pkt_len:
> > > >
> > > > 	m = rte_pktmbuf_alloc();
> 
> I just realized that if we let this function to allocate mbuf, it may restrict this
> api's applicability.
> E.g. the caller just has a mbuf, without a mempool.
> How do you think?
> 
> > > > 	if (m == NULL)
> > > > 		return NULL;
> > > > 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
> > >
> > > When data_len is not enough, we need to rte_pktmbuf_append(m,
> > RARP_PKT_SIZE - m->data_len);
> >
> > Sorry, I don't get your point here.
> 
> I mean we just need to extend the data_len by "RARP_PKT_SIZE - m-
> >data_len" when the room is not big enough.

OK, in your sample code, you rte_pktmbuf_alloc() a mbuf, it's reset already, so we just append RARP_PKT_SIZE. I got you~

For the mbuf allocation, we can let this function do allocation and content filling. If the app needs special need, e.g. chained mbuf,
then let the app fill it by itself.

> 
> BRs,
> Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v11 3/5] net: add a helper for making RARP packet
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-16 14:29                                           ` Olivier Matz
  0 siblings, 0 replies; 112+ messages in thread
From: Olivier Matz @ 2018-01-16 14:29 UTC (permalink / raw)
  To: Xiao Wang; +Cc: yliu, dev, thomas, tiwei.bie, stephen, maxime.coquelin

On Wed, Jan 17, 2018 at 05:41:01AM +0800, Xiao Wang wrote:
> Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Acked-by: Olivier Matz <olivier.matz@6wind.com>

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE
  2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet Xiao Wang
  2018-01-10 13:06                                       ` Yuanhan Liu
  2018-01-16  9:01                                       ` Olivier Matz
@ 2018-01-16 21:40                                       ` Xiao Wang
  2018-01-16 21:40                                         ` [dpdk-dev] [PATCH v11 1/5] net/virtio: make control queue thread-safe Xiao Wang
                                                           ` (5 more replies)
  2 siblings, 6 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-16 21:40 UTC (permalink / raw)
  To: yliu, olivier.matz
  Cc: dev, thomas, tiwei.bie, stephen, maxime.coquelin, Xiao Wang

When live migration is finished, the backup VM needs to proactively announce
its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
generate a RARP packet to switch in dequeue path. Another method is to let
the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
feature.

This patch set enables this feature in virtio pmd, to support VM running virtio
pmd be migrated without vhost supporting RARP generation.

v11:
- Add check for parameter and tailroom in rte_net_make_rarp_packet.
- Allocate mbuf in rte_net_make_rarp_packet.

v10:
- Add a bold doxygen comment for the experimental function.

v9:
- Introduce function with the experimental state.

v8:
- Add a helper in lib/librte_net to make rarp packet, it's used by
  both vhost and virtio.

v7:
- Improve comment for state_lock.
- Rename spinlock variable 'sl' to 'lock'.

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (5):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net: add a helper for making RARP packet
  vhost: use lib API to make RARP packet
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 113 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 ++++
 lib/Makefile                            |   3 +-
 lib/librte_net/Makefile                 |   1 +
 lib/librte_net/rte_arp.c                |  50 ++++++++++++++
 lib/librte_net/rte_arp.h                |  18 +++++
 lib/librte_net/rte_net_version.map      |   6 ++
 lib/librte_vhost/Makefile               |   2 +-
 lib/librte_vhost/virtio_net.c           |  51 +-------------
 14 files changed, 219 insertions(+), 55 deletions(-)
 create mode 100644 lib/librte_net/rte_arp.c

-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v11 1/5] net/virtio: make control queue thread-safe
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-16 21:40                                         ` Xiao Wang
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 2/5] net/virtio: add packet injection method Xiao Wang
                                                           ` (4 subsequent siblings)
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-16 21:40 UTC (permalink / raw)
  To: yliu, olivier.matz
  Cc: dev, thomas, tiwei.bie, stephen, maxime.coquelin, Xiao Wang

The virtio_send_command function may be called from app's configuration
routine, but also from an interrupt handler called when live migration is
done on the backup side. So this patch makes control queue thread-safe
first.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 drivers/net/virtio/virtio_rxtx.c   | 1 +
 drivers/net/virtio/virtio_rxtx.h   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 21f2131a9..4e613ce30 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -148,6 +148,8 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+
+	rte_spinlock_lock(&cvq->lock);
 	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
@@ -155,8 +157,10 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		"vq->hw->cvq = %p vq = %p",
 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
 
-	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
+		rte_spinlock_unlock(&cvq->lock);
 		return -1;
+	}
 
 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
@@ -232,6 +236,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 
 	result = cvq->virtio_net_hdr_mz->addr;
 
+	rte_spinlock_unlock(&cvq->lock);
 	return result->status;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 994458ced..265debf20 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -378,6 +378,7 @@ virtio_dev_cq_start(struct rte_eth_dev *dev)
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	if (hw->cvq && hw->cvq->vq) {
+		rte_spinlock_init(&hw->cvq->lock);
 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
 	}
 }
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index ca546ccaa..8b1a448a0 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -55,6 +55,7 @@ struct virtnet_ctl {
 	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
 	uint16_t port_id;               /**< Device port identifier. */
 	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
+	rte_spinlock_t lock;            /**< spinlock for control queue. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v11 2/5] net/virtio: add packet injection method
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-16 21:40                                         ` [dpdk-dev] [PATCH v11 1/5] net/virtio: make control queue thread-safe Xiao Wang
@ 2018-01-16 21:41                                         ` Xiao Wang
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 3/5] net: add a helper for making RARP packet Xiao Wang
                                                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-16 21:41 UTC (permalink / raw)
  To: yliu, olivier.matz
  Cc: dev, thomas, tiwei.bie, stephen, maxime.coquelin, Xiao Wang

This patch adds dev_pause, dev_resume and inject_pkts APIs to allow
driver to pause the worker threads and inject special packets into
Tx queue. The next patch will be based on this.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 56 +++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.h      |  5 +++
 drivers/net/virtio/virtio_pci.h         |  7 +++++
 drivers/net/virtio/virtio_rxtx.c        |  2 +-
 drivers/net/virtio/virtio_rxtx_simple.c |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 4e613ce30..e8ff1e449 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -26,6 +26,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_cycles.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -1220,6 +1221,57 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 	return 0;
 }
 
+int
+virtio_dev_pause(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	rte_spinlock_lock(&hw->state_lock);
+
+	if (hw->started == 0) {
+		/* Device is just stopped. */
+		rte_spinlock_unlock(&hw->state_lock);
+		return -1;
+	}
+	hw->started = 0;
+	/*
+	 * Prevent the worker threads from touching queues to avoid contention,
+	 * 1 ms should be enough for the ongoing Tx function to finish.
+	 */
+	rte_delay_ms(1);
+	return 0;
+}
+
+/*
+ * Recover hw state to let the worker threads continue.
+ */
+void
+virtio_dev_resume(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	rte_spinlock_unlock(&hw->state_lock);
+}
+
+/*
+ * Should be called only after device is paused.
+ */
+int
+virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_tx *txvq = dev->data->tx_queues[0];
+	int ret;
+
+	hw->inject_pkts = tx_pkts;
+	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
+	hw->inject_pkts = NULL;
+
+	return ret;
+}
+
 /*
  * Process Virtio Config changed interrupt and call the callback
  * if link state changed.
@@ -1757,6 +1809,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	rte_spinlock_init(&hw->state_lock);
+
 	hw->use_simple_rx = 1;
 	hw->use_simple_tx = 1;
 
@@ -1923,12 +1977,14 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
+	rte_spinlock_lock(&hw->state_lock);
 	if (intr_conf->lsc || intr_conf->rxq)
 		virtio_intr_disable(dev);
 
 	hw->started = 0;
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
+	rte_spinlock_unlock(&hw->state_lock);
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 765d249e6..69b30b7e1 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -92,4 +92,9 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
 void virtio_interrupt_handler(void *param);
 
+int virtio_dev_pause(struct rte_eth_dev *dev);
+void virtio_dev_resume(struct rte_eth_dev *dev);
+int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
+		int nb_pkts);
+
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index fb1f6a9ec..9d810a599 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -241,6 +241,13 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
+	/*
+	 * App management thread and virtio interrupt handler thread
+	 * both can change device state, this lock is meant to avoid
+	 * such a contention.
+	 */
+	rte_spinlock_t state_lock;
+	struct rte_mbuf **inject_pkts;
 
 	struct virtqueue **vqs;
 };
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 265debf20..80e996d06 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -988,7 +988,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_used, nb_tx = 0;
 	int error;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	if (unlikely(nb_pkts < 1))
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8ef3c0c04..98a9da5d8 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -70,7 +70,7 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 	uint16_t nb_tx = 0;
 
-	if (unlikely(hw->started == 0))
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
 		return nb_tx;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v11 3/5] net: add a helper for making RARP packet
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-16 21:40                                         ` [dpdk-dev] [PATCH v11 1/5] net/virtio: make control queue thread-safe Xiao Wang
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 2/5] net/virtio: add packet injection method Xiao Wang
@ 2018-01-16 21:41                                         ` Xiao Wang
  2018-01-16 14:29                                           ` Olivier Matz
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 4/5] vhost: use lib API to make " Xiao Wang
                                                           ` (2 subsequent siblings)
  5 siblings, 1 reply; 112+ messages in thread
From: Xiao Wang @ 2018-01-16 21:41 UTC (permalink / raw)
  To: yliu, olivier.matz
  Cc: dev, thomas, tiwei.bie, stephen, maxime.coquelin, Xiao Wang

Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_net/Makefile            |  1 +
 lib/librte_net/rte_arp.c           | 50 ++++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_arp.h           | 18 ++++++++++++++
 lib/librte_net/rte_net_version.map |  6 +++++
 4 files changed, 75 insertions(+)
 create mode 100644 lib/librte_net/rte_arp.c

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 5e8a76b68..ab290c382 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -13,6 +13,7 @@ LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
 SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
new file mode 100644
index 000000000..b953bcd7e
--- /dev/null
+++ b/lib/librte_net/rte_arp.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <arpa/inet.h>
+
+#include <rte_arp.h>
+
+#define RARP_PKT_SIZE	64
+struct rte_mbuf *
+rte_net_make_rarp_packet(struct rte_mempool *mpool,
+		const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+	struct rte_mbuf *mbuf;
+
+	if (mpool == NULL)
+		return NULL;
+
+	mbuf = rte_pktmbuf_alloc(mpool);
+	if (mbuf == NULL)
+		return NULL;
+
+	eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(mbuf, RARP_PKT_SIZE);
+	if (eth_hdr == NULL) {
+		rte_pktmbuf_free(mbuf);
+		return NULL;
+	}
+
+	/* Ethernet header. */
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	return mbuf;
+}
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index 183641874..457a39b15 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -76,6 +76,24 @@ struct arp_hdr {
 	struct arp_ipv4 arp_data;
 } __attribute__((__packed__));
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Make a RARP packet based on MAC addr.
+ *
+ * @param mpool
+ *   Pointer to the rte_mempool
+ * @param mac
+ *   Pointer to the MAC addr
+ *
+ * @return
+ *   - RARP packet pointer on success, or NULL on error
+ */
+struct rte_mbuf *
+rte_net_make_rarp_packet(struct rte_mempool *mpool,
+		const struct ether_addr *mac);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 687c40eaf..213e6fd32 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -12,3 +12,9 @@ DPDK_17.05 {
 	rte_net_crc_set_alg;
 
 } DPDK_16.11;
+
+EXPERIMENTAL {
+	global:
+
+	rte_net_make_rarp_packet;
+} DPDK_17.05;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v11 4/5] vhost: use lib API to make RARP packet
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                           ` (2 preceding siblings ...)
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 3/5] net: add a helper for making RARP packet Xiao Wang
@ 2018-01-16 21:41                                         ` Xiao Wang
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
  2018-01-18  3:09                                         ` [dpdk-dev] [PATCH v11 0/5] " Yuanhan Liu
  5 siblings, 0 replies; 112+ messages in thread
From: Xiao Wang @ 2018-01-16 21:41 UTC (permalink / raw)
  To: yliu, olivier.matz
  Cc: dev, thomas, tiwei.bie, stephen, maxime.coquelin, Xiao Wang

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/Makefile                  |  3 ++-
 lib/librte_vhost/Makefile     |  2 +-
 lib/librte_vhost/virtio_net.c | 51 +++----------------------------------------
 3 files changed, 6 insertions(+), 50 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index 2cc37d633..f59855d3a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -31,7 +31,8 @@ DEPDIRS-librte_security += librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
 DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
-DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether \
+			librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 3b8cd27a3..065d5c469 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -17,7 +17,7 @@ LDLIBS += -lpthread
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2227ced88..5d6960e23 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -813,45 +813,6 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
-#define RARP_PKT_SIZE	64
-
-static int
-make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
-{
-	struct ether_hdr *eth_hdr;
-	struct arp_hdr  *rarp;
-
-	if (rarp_mbuf->buf_len < 64) {
-		RTE_LOG(WARNING, VHOST_DATA,
-			"failed to make RARP; mbuf size too small %u (< %d)\n",
-			rarp_mbuf->buf_len, RARP_PKT_SIZE);
-		return -1;
-	}
-
-	/* Ethernet header. */
-	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
-	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
-	ether_addr_copy(mac, &eth_hdr->s_addr);
-	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
-
-	/* RARP header. */
-	rarp = (struct arp_hdr *)(eth_hdr + 1);
-	rarp->arp_hrd = htons(ARP_HRD_ETHER);
-	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
-	rarp->arp_hln = ETHER_ADDR_LEN;
-	rarp->arp_pln = 4;
-	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
-
-	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
-	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
-	memset(&rarp->arp_data.arp_sip, 0x00, 4);
-	memset(&rarp->arp_data.arp_tip, 0x00, 4);
-
-	rarp_mbuf->pkt_len  = rarp_mbuf->data_len = RARP_PKT_SIZE;
-
-	return 0;
-}
-
 static __rte_always_inline void
 put_zmbuf(struct zcopy_mbuf *zmbuf)
 {
@@ -1207,19 +1168,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			rte_atomic16_cmpset((volatile uint16_t *)
 				&dev->broadcast_rarp.cnt, 1, 0))) {
 
-		rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool);
+		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
 		if (rarp_mbuf == NULL) {
 			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to allocate memory for mbuf.\n");
+				"Failed to make RARP packet.\n");
 			return 0;
 		}
-
-		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
-			rte_pktmbuf_free(rarp_mbuf);
-			rarp_mbuf = NULL;
-		} else {
-			count -= 1;
-		}
+		count -= 1;
 	}
 
 	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                           ` (3 preceding siblings ...)
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 4/5] vhost: use lib API to make " Xiao Wang
@ 2018-01-16 21:41                                         ` Xiao Wang
  2018-01-19 17:33                                           ` Ferruh Yigit
  2018-01-18  3:09                                         ` [dpdk-dev] [PATCH v11 0/5] " Yuanhan Liu
  5 siblings, 1 reply; 112+ messages in thread
From: Xiao Wang @ 2018-01-16 21:41 UTC (permalink / raw)
  To: yliu, olivier.matz
  Cc: dev, thomas, tiwei.bie, stephen, maxime.coquelin, Xiao Wang

When live migration is done, for the backup VM, either the virtio
frontend or the vhost backend needs to send out gratuitous RARP packet
to announce its new network location.

This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
migration scenario where the vhost backend doesn't have the ability to
generate RARP packet.

Brief introduction of the work flow:
1. QEMU finishes live migration, pokes the backup VM with an interrupt.
2. Virtio interrupt handler reads out the interrupt status value, and
   realizes it needs to send out RARP packet to announce its location.
3. Pause device to stop worker thread touching the queues.
4. Inject a RARP packet into a Tx Queue.
5. Ack the interrupt via control queue.
6. Resume device to continue packet processing.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 50 ++++++++++++++++++++++++++++++++++++--
 drivers/net/virtio/virtio_ethdev.h |  1 +
 drivers/net/virtio/virtqueue.h     | 11 +++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e8ff1e449..4f60d1367 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -19,6 +19,8 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_arp.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_cpuflags.h>
@@ -78,6 +80,9 @@ static int virtio_dev_queue_stats_mapping_set(
 	uint8_t stat_idx,
 	uint8_t is_rx);
 
+static void virtio_notify_peers(struct rte_eth_dev *dev);
+static void virtio_ack_link_announce(struct rte_eth_dev *dev);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -1272,9 +1277,46 @@ virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
 	return ret;
 }
 
+static void
+virtio_notify_peers(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct rte_mbuf *rarp_mbuf;
+
+	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
+			(struct ether_addr *)hw->mac_addr);
+	if (rarp_mbuf == NULL) {
+		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
+		return;
+	}
+
+	/* If virtio port just stopped, no need to send RARP */
+	if (virtio_dev_pause(dev) < 0) {
+		rte_pktmbuf_free(rarp_mbuf);
+		return;
+	}
+
+	virtio_inject_pkts(dev, &rarp_mbuf, 1);
+	virtio_dev_resume(dev);
+}
+
+static void
+virtio_ack_link_announce(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtio_pmd_ctrl ctrl;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
+
+	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
+}
+
 /*
- * Process Virtio Config changed interrupt and call the callback
- * if link state changed.
+ * Process virtio config changed interrupt. Call the callback
+ * if link state changed, generate gratuitous RARP packet if
+ * the status indicates an ANNOUNCE.
  */
 void
 virtio_interrupt_handler(void *param)
@@ -1297,6 +1339,10 @@ virtio_interrupt_handler(void *param)
 						      NULL, NULL);
 	}
 
+	if (isr & VIRTIO_NET_S_ANNOUNCE) {
+		virtio_notify_peers(dev);
+		virtio_ack_link_announce(dev);
+	}
 }
 
 /* set rx and tx handlers according to what is supported */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 69b30b7e1..09ebc5fb5 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -38,6 +38,7 @@
 	 1u << VIRTIO_NET_F_HOST_TSO6	  |	\
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1u << VIRTIO_NET_F_MTU	| \
+	 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |	\
 	 1u << VIRTIO_RING_F_INDIRECT_DESC |    \
 	 1ULL << VIRTIO_F_VERSION_1       |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 1482a951d..60df359b3 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -129,6 +129,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE     3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
 struct virtio_net_ctrl_hdr {
 	uint8_t class;
 	uint8_t cmd;
-- 
2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE
  2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
                                                           ` (4 preceding siblings ...)
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-18  3:09                                         ` Yuanhan Liu
  2018-01-18  3:14                                           ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Yuanhan Liu
  5 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-18  3:09 UTC (permalink / raw)
  To: Xiao Wang
  Cc: olivier.matz, dev, thomas, tiwei.bie, stephen, maxime.coquelin,
	Ferruh Yigit

Xiao told me that this series (except the last patch) was already applied
to the Thomas master branch. I then realised it was my mistake.

I applied v10 last week locally for some basic testing. There is a conflict
in last patch, that's why the last patch is not merged. I forgot to do
a reset before I applied another patch. Then later, I did a push to the
next-virtio tree, thus patches from Xiao were also pushed. Ferruh then
did a pull from it. As a result, they got merged to the master branch
before I realised. Non-rebase is allowed there, thus I have made a patch
to fix my mistake.

Meanwhile, I have also spotted a build error when shared lib is enabled.
I will send them out soon.

	--yliu

On Wed, Jan 17, 2018 at 05:40:58AM +0800, Xiao Wang wrote:
> When live migration is finished, the backup VM needs to proactively announce
> its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
> generate a RARP packet to switch in dequeue path. Another method is to let
> the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
> feature.
> 
> This patch set enables this feature in virtio pmd, to support VM running virtio
> pmd be migrated without vhost supporting RARP generation.
> 
> v11:
> - Add check for parameter and tailroom in rte_net_make_rarp_packet.
> - Allocate mbuf in rte_net_make_rarp_packet.
> 
> v10:
> - Add a bold doxygen comment for the experimental function.
> 
> v9:
> - Introduce function with the experimental state.
> 
> v8:
> - Add a helper in lib/librte_net to make rarp packet, it's used by
>   both vhost and virtio.
> 
> v7:
> - Improve comment for state_lock.
> - Rename spinlock variable 'sl' to 'lock'.
> 
> v6:
> - Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
> - Remove the 'len' parameter in calling virtio_send_command().
> - Remove extra space between typo and var.
> - Improve comment and alignment.
> - Remove the unnecessary header file.
> - A better usage of 'unlikely' indication.
> 
> v5:
> - Remove txvq parameter in virtio_inject_pkts.
> - Zero hw->special_buf after using it.
> - Return the retval of tx_pkt_burst().
> - Allocate a mbuf pointer on stack directly.
> 
> v4:
> - Move spinlock lock/unlock into dev_pause/resume.
> - Separate out a patch for packet injection.
> 
> v3:
> - Remove Tx function code duplication, use a special pointer for rarp
>   injection.
> - Rename function generate_rarp to virtio_notify_peers, replace
>   'virtnet_' with 'virtio_'.
> - Add comment for state_lock.
> - Typo fix and comment improvement.
> 
> v2:
> - Use spaces instead of tabs between the code and comments.
> - Remove unnecessary parentheses.
> - Use rte_pktmbuf_mtod directly to get eth_hdr addr.
> - Fix virtio_dev_pause return value check.
> 
> Xiao Wang (5):
>   net/virtio: make control queue thread-safe
>   net/virtio: add packet injection method
>   net: add a helper for making RARP packet
>   vhost: use lib API to make RARP packet
>   net/virtio: support GUEST ANNOUNCE
> 
>  drivers/net/virtio/virtio_ethdev.c      | 113 +++++++++++++++++++++++++++++++-
>  drivers/net/virtio/virtio_ethdev.h      |   6 ++
>  drivers/net/virtio/virtio_pci.h         |   7 ++
>  drivers/net/virtio/virtio_rxtx.c        |   3 +-
>  drivers/net/virtio/virtio_rxtx.h        |   1 +
>  drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
>  drivers/net/virtio/virtqueue.h          |  11 ++++
>  lib/Makefile                            |   3 +-
>  lib/librte_net/Makefile                 |   1 +
>  lib/librte_net/rte_arp.c                |  50 ++++++++++++++
>  lib/librte_net/rte_arp.h                |  18 +++++
>  lib/librte_net/rte_net_version.map      |   6 ++
>  lib/librte_vhost/Makefile               |   2 +-
>  lib/librte_vhost/virtio_net.c           |  51 +-------------
>  14 files changed, 219 insertions(+), 55 deletions(-)
>  create mode 100644 lib/librte_net/rte_arp.c
> 
> -- 
> 2.15.1

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH 1/2] net: fixup RARP generation
  2018-01-18  3:09                                         ` [dpdk-dev] [PATCH v11 0/5] " Yuanhan Liu
@ 2018-01-18  3:14                                           ` Yuanhan Liu
  2018-01-18  3:14                                             ` [dpdk-dev] [PATCH 2/2] net: fix build error Yuanhan Liu
                                                               ` (2 more replies)
  0 siblings, 3 replies; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-18  3:14 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, Xiao Wang, Ferruh Yigit, Olivier Matz, Yuanhan Liu

Due to a mistake operation from me, older version (v10) was merged to
master branch. It's the v11 should be applied. However, the master branch
is not rebase-able. Thus, this patch is made, from the diff between v10
and v11.

Code is from Xiao Wang.

Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
---
 lib/librte_net/rte_arp.c      | 26 +++++++++++++++++---------
 lib/librte_net/rte_arp.h      | 11 ++++++-----
 lib/librte_vhost/virtio_net.c | 12 +++---------
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
index d7223b0..b953bcd 100644
--- a/lib/librte_net/rte_arp.c
+++ b/lib/librte_net/rte_arp.c
@@ -7,17 +7,28 @@
 #include <rte_arp.h>
 
 #define RARP_PKT_SIZE	64
-int
-rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
+struct rte_mbuf *
+rte_net_make_rarp_packet(struct rte_mempool *mpool,
+		const struct ether_addr *mac)
 {
 	struct ether_hdr *eth_hdr;
 	struct arp_hdr *rarp;
+	struct rte_mbuf *mbuf;
 
-	if (mbuf->buf_len < RARP_PKT_SIZE)
-		return -1;
+	if (mpool == NULL)
+		return NULL;
+
+	mbuf = rte_pktmbuf_alloc(mpool);
+	if (mbuf == NULL)
+		return NULL;
+
+	eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(mbuf, RARP_PKT_SIZE);
+	if (eth_hdr == NULL) {
+		rte_pktmbuf_free(mbuf);
+		return NULL;
+	}
 
 	/* Ethernet header. */
-	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
 	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
 	ether_addr_copy(mac, &eth_hdr->s_addr);
 	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
@@ -35,8 +46,5 @@ rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
 	memset(&rarp->arp_data.arp_sip, 0x00, 4);
 	memset(&rarp->arp_data.arp_tip, 0x00, 4);
 
-	mbuf->data_len = RARP_PKT_SIZE;
-	mbuf->pkt_len = RARP_PKT_SIZE;
-
-	return 0;
+	return mbuf;
 }
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index dad7423..457a39b 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -82,16 +82,17 @@ struct arp_hdr {
  *
  * Make a RARP packet based on MAC addr.
  *
- * @param mbuf
- *   Pointer to the rte_mbuf structure
+ * @param mpool
+ *   Pointer to the rte_mempool
  * @param mac
  *   Pointer to the MAC addr
  *
  * @return
- *   - 0 on success, negative on error
+ *   - RARP packet pointer on success, or NULL on error
  */
-int
-rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
+struct rte_mbuf *
+rte_net_make_rarp_packet(struct rte_mempool *mpool,
+		const struct ether_addr *mac);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ca89288..a1d8026 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1162,19 +1162,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			rte_atomic16_cmpset((volatile uint16_t *)
 				&dev->broadcast_rarp.cnt, 1, 0))) {
 
-		rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool);
+		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
 		if (rarp_mbuf == NULL) {
 			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to allocate memory for mbuf.\n");
+				"Failed to make RARP packet.\n");
 			return 0;
 		}
-
-		if (rte_net_make_rarp_packet(rarp_mbuf, &dev->mac) < 0) {
-			rte_pktmbuf_free(rarp_mbuf);
-			rarp_mbuf = NULL;
-		} else {
-			count -= 1;
-		}
+		count -= 1;
 	}
 
 	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
-- 
2.7.4

^ permalink raw reply	[flat|nested] 112+ messages in thread

* [dpdk-dev] [PATCH 2/2] net: fix build error
  2018-01-18  3:14                                           ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Yuanhan Liu
@ 2018-01-18  3:14                                             ` Yuanhan Liu
  2018-01-18  7:38                                               ` Thomas Monjalon
  2018-01-18  8:38                                             ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Thomas Monjalon
  2018-01-19 16:04                                             ` Ferruh Yigit
  2 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-18  3:14 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, Xiao Wang, Ferruh Yigit, Olivier Matz, Yuanhan Liu

Fix build error when shared lib is enabled:

  LD librte_net.so.1.1
rte_arp.o: In function `rte_net_make_rarp_packet':
rte_arp.c:(.text+0x1f0): undefined reference to `rte_mempool_ops_table'
rte_arp.c:(.text+0x21d): undefined reference to `rte_mempool_ops_table'
rte_arp.c:(.text+0x2d5): undefined reference to `rte_mempool_ops_table'
rte_arp.c:(.text+0x384): undefined reference to `rte_mempool_ops_table'
rte_arp.c:(.text+0x4b7): undefined reference to `rte_mempool_ops_table'

Fixes: 45ae05df824c ("net: add a helper for making RARP packet")

Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
---
 lib/librte_net/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index ab290c3..95ff549 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -6,7 +6,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_net.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-LDLIBS += -lrte_mbuf -lrte_eal
+LDLIBS += -lrte_mbuf -lrte_eal -lrte_mempool
 
 EXPORT_MAP := rte_net_version.map
 LIBABIVER := 1
-- 
2.7.4

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net: fix build error
  2018-01-18  3:14                                             ` [dpdk-dev] [PATCH 2/2] net: fix build error Yuanhan Liu
@ 2018-01-18  7:38                                               ` Thomas Monjalon
  2018-01-18  7:45                                                 ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-18  7:38 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Xiao Wang, Ferruh Yigit, Olivier Matz

18/01/2018 04:14, Yuanhan Liu:
> Fix build error when shared lib is enabled:
> 
>   LD librte_net.so.1.1
> rte_arp.o: In function `rte_net_make_rarp_packet':
> rte_arp.c:(.text+0x1f0): undefined reference to `rte_mempool_ops_table'
> rte_arp.c:(.text+0x21d): undefined reference to `rte_mempool_ops_table'
> rte_arp.c:(.text+0x2d5): undefined reference to `rte_mempool_ops_table'
> rte_arp.c:(.text+0x384): undefined reference to `rte_mempool_ops_table'
> rte_arp.c:(.text+0x4b7): undefined reference to `rte_mempool_ops_table'

This is very strange, I do not see this error on my machine.

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net: fix build error
  2018-01-18  7:38                                               ` Thomas Monjalon
@ 2018-01-18  7:45                                                 ` Wang, Xiao W
  2018-01-18  8:03                                                   ` Yuanhan Liu
  0 siblings, 1 reply; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-18  7:45 UTC (permalink / raw)
  To: Thomas Monjalon, Yuanhan Liu; +Cc: dev, Yigit, Ferruh, Olivier Matz



> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, January 18, 2018 3:39 PM
> To: Yuanhan Liu <yliu@fridaylinux.org>
> Cc: dev@dpdk.org; Wang, Xiao W <xiao.w.wang@intel.com>; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Olivier Matz <olivier.matz@6wind.com>
> Subject: Re: [PATCH 2/2] net: fix build error
> 
> 18/01/2018 04:14, Yuanhan Liu:
> > Fix build error when shared lib is enabled:
> >
> >   LD librte_net.so.1.1
> > rte_arp.o: In function `rte_net_make_rarp_packet':
> > rte_arp.c:(.text+0x1f0): undefined reference to `rte_mempool_ops_table'
> > rte_arp.c:(.text+0x21d): undefined reference to `rte_mempool_ops_table'
> > rte_arp.c:(.text+0x2d5): undefined reference to `rte_mempool_ops_table'
> > rte_arp.c:(.text+0x384): undefined reference to `rte_mempool_ops_table'
> > rte_arp.c:(.text+0x4b7): undefined reference to `rte_mempool_ops_table'
> 
> This is very strange, I do not see this error on my machine.

I could see this error on mine with:
+CONFIG_RTE_BUILD_SHARED_LIB=y

And this fix helps.

Best Regards,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net: fix build error
  2018-01-18  7:45                                                 ` Wang, Xiao W
@ 2018-01-18  8:03                                                   ` Yuanhan Liu
  2018-01-18  8:36                                                     ` Thomas Monjalon
  0 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-18  8:03 UTC (permalink / raw)
  To: Wang, Xiao W; +Cc: Thomas Monjalon, dev, Yigit, Ferruh, Olivier Matz

On Thu, Jan 18, 2018 at 07:45:23AM +0000, Wang, Xiao W wrote:
> 
> 
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > Sent: Thursday, January 18, 2018 3:39 PM
> > To: Yuanhan Liu <yliu@fridaylinux.org>
> > Cc: dev@dpdk.org; Wang, Xiao W <xiao.w.wang@intel.com>; Yigit, Ferruh
> > <ferruh.yigit@intel.com>; Olivier Matz <olivier.matz@6wind.com>
> > Subject: Re: [PATCH 2/2] net: fix build error
> > 
> > 18/01/2018 04:14, Yuanhan Liu:
> > > Fix build error when shared lib is enabled:
> > >
> > >   LD librte_net.so.1.1
> > > rte_arp.o: In function `rte_net_make_rarp_packet':
> > > rte_arp.c:(.text+0x1f0): undefined reference to `rte_mempool_ops_table'
> > > rte_arp.c:(.text+0x21d): undefined reference to `rte_mempool_ops_table'
> > > rte_arp.c:(.text+0x2d5): undefined reference to `rte_mempool_ops_table'
> > > rte_arp.c:(.text+0x384): undefined reference to `rte_mempool_ops_table'
> > > rte_arp.c:(.text+0x4b7): undefined reference to `rte_mempool_ops_table'
> > 
> > This is very strange, I do not see this error on my machine.
> 
> I could see this error on mine with:
> +CONFIG_RTE_BUILD_SHARED_LIB=y

Yes, that's what meant in the commit log by "when shared lib is enabled".

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net: fix build error
  2018-01-18  8:03                                                   ` Yuanhan Liu
@ 2018-01-18  8:36                                                     ` Thomas Monjalon
  2018-01-18  8:48                                                       ` Yuanhan Liu
  0 siblings, 1 reply; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-18  8:36 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Wang, Xiao W, Yigit, Ferruh, Olivier Matz

18/01/2018 09:03, Yuanhan Liu:
> On Thu, Jan 18, 2018 at 07:45:23AM +0000, Wang, Xiao W wrote:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 18/01/2018 04:14, Yuanhan Liu:
> > > > Fix build error when shared lib is enabled:
> > > >
> > > >   LD librte_net.so.1.1
> > > > rte_arp.o: In function `rte_net_make_rarp_packet':
> > > > rte_arp.c:(.text+0x1f0): undefined reference to `rte_mempool_ops_table'
> > > > rte_arp.c:(.text+0x21d): undefined reference to `rte_mempool_ops_table'
> > > > rte_arp.c:(.text+0x2d5): undefined reference to `rte_mempool_ops_table'
> > > > rte_arp.c:(.text+0x384): undefined reference to `rte_mempool_ops_table'
> > > > rte_arp.c:(.text+0x4b7): undefined reference to `rte_mempool_ops_table'
> > > 
> > > This is very strange, I do not see this error on my machine.
> > 
> > I could see this error on mine with:
> > +CONFIG_RTE_BUILD_SHARED_LIB=y
> 
> Yes, that's what meant in the commit log by "when shared lib is enabled".

Got it: you are fixing a build issue introduced by the patch 1
in this series.
So please merge them.

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net: fixup RARP generation
  2018-01-18  3:14                                           ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Yuanhan Liu
  2018-01-18  3:14                                             ` [dpdk-dev] [PATCH 2/2] net: fix build error Yuanhan Liu
@ 2018-01-18  8:38                                             ` Thomas Monjalon
  2018-01-18  8:51                                               ` Yuanhan Liu
  2018-01-19 16:04                                             ` Ferruh Yigit
  2 siblings, 1 reply; 112+ messages in thread
From: Thomas Monjalon @ 2018-01-18  8:38 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Xiao Wang, Ferruh Yigit, Olivier Matz

18/01/2018 04:14, Yuanhan Liu:
> Due to a mistake operation from me, older version (v10) was merged to
> master branch. It's the v11 should be applied. However, the master branch
> is not rebase-able. Thus, this patch is made, from the diff between v10
> and v11.

Understood it is a mistake.
However, you can briefly describes what does this change.
Is there a changelog in v11 patch?
> 
> Code is from Xiao Wang.

You may add his Signed-off.

> Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net: fix build error
  2018-01-18  8:36                                                     ` Thomas Monjalon
@ 2018-01-18  8:48                                                       ` Yuanhan Liu
  0 siblings, 0 replies; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-18  8:48 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Wang, Xiao W, Yigit, Ferruh, Olivier Matz

On Thu, Jan 18, 2018 at 09:36:46AM +0100, Thomas Monjalon wrote:
> 18/01/2018 09:03, Yuanhan Liu:
> > On Thu, Jan 18, 2018 at 07:45:23AM +0000, Wang, Xiao W wrote:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 18/01/2018 04:14, Yuanhan Liu:
> > > > > Fix build error when shared lib is enabled:
> > > > >
> > > > >   LD librte_net.so.1.1
> > > > > rte_arp.o: In function `rte_net_make_rarp_packet':
> > > > > rte_arp.c:(.text+0x1f0): undefined reference to `rte_mempool_ops_table'
> > > > > rte_arp.c:(.text+0x21d): undefined reference to `rte_mempool_ops_table'
> > > > > rte_arp.c:(.text+0x2d5): undefined reference to `rte_mempool_ops_table'
> > > > > rte_arp.c:(.text+0x384): undefined reference to `rte_mempool_ops_table'
> > > > > rte_arp.c:(.text+0x4b7): undefined reference to `rte_mempool_ops_table'
> > > > 
> > > > This is very strange, I do not see this error on my machine.
> > > 
> > > I could see this error on mine with:
> > > +CONFIG_RTE_BUILD_SHARED_LIB=y
> > 
> > Yes, that's what meant in the commit log by "when shared lib is enabled".
> 
> Got it: you are fixing a build issue introduced by the patch 1
> in this series.
> So please merge them.

Right. I will merge them.

	--yliu

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net: fixup RARP generation
  2018-01-18  8:38                                             ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Thomas Monjalon
@ 2018-01-18  8:51                                               ` Yuanhan Liu
  2018-01-18  8:53                                                 ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Yuanhan Liu @ 2018-01-18  8:51 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Xiao Wang, Ferruh Yigit, Olivier Matz

On Thu, Jan 18, 2018 at 09:38:39AM +0100, Thomas Monjalon wrote:
> 18/01/2018 04:14, Yuanhan Liu:
> > Due to a mistake operation from me, older version (v10) was merged to
> > master branch. It's the v11 should be applied. However, the master branch
> > is not rebase-able. Thus, this patch is made, from the diff between v10
> > and v11.
> 
> Understood it is a mistake.
> However, you can briefly describes what does this change.
> Is there a changelog in v11 patch?

Yes, ther is:

v11:
- Add check for parameter and tailroom in rte_net_make_rarp_packet.
- Allocate mbuf in rte_net_make_rarp_packet.

> > 
> > Code is from Xiao Wang.
> 
> You may add his Signed-off.

I have no objection. Xiao, okay to you? I will also set the author
to you.

	--yliu

> > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net: fixup RARP generation
  2018-01-18  8:51                                               ` Yuanhan Liu
@ 2018-01-18  8:53                                                 ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-18  8:53 UTC (permalink / raw)
  To: Yuanhan Liu, Thomas Monjalon; +Cc: dev, Yigit, Ferruh, Olivier Matz



> -----Original Message-----
> From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> Sent: Thursday, January 18, 2018 4:51 PM
> To: Thomas Monjalon <thomas@monjalon.net>
> Cc: dev@dpdk.org; Wang, Xiao W <xiao.w.wang@intel.com>; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Olivier Matz <olivier.matz@6wind.com>
> Subject: Re: [dpdk-dev] [PATCH 1/2] net: fixup RARP generation
> 
> On Thu, Jan 18, 2018 at 09:38:39AM +0100, Thomas Monjalon wrote:
> > 18/01/2018 04:14, Yuanhan Liu:
> > > Due to a mistake operation from me, older version (v10) was merged to
> > > master branch. It's the v11 should be applied. However, the master branch
> > > is not rebase-able. Thus, this patch is made, from the diff between v10
> > > and v11.
> >
> > Understood it is a mistake.
> > However, you can briefly describes what does this change.
> > Is there a changelog in v11 patch?
> 
> Yes, ther is:
> 
> v11:
> - Add check for parameter and tailroom in rte_net_make_rarp_packet.
> - Allocate mbuf in rte_net_make_rarp_packet.
> 
> > >
> > > Code is from Xiao Wang.
> >
> > You may add his Signed-off.
> 
> I have no objection. Xiao, okay to you? I will also set the author
> to you.
> 
> 	--yliu
> 
> > > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>

OK for me.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net: fixup RARP generation
  2018-01-18  3:14                                           ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Yuanhan Liu
  2018-01-18  3:14                                             ` [dpdk-dev] [PATCH 2/2] net: fix build error Yuanhan Liu
  2018-01-18  8:38                                             ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Thomas Monjalon
@ 2018-01-19 16:04                                             ` Ferruh Yigit
  2 siblings, 0 replies; 112+ messages in thread
From: Ferruh Yigit @ 2018-01-19 16:04 UTC (permalink / raw)
  To: Yuanhan Liu, dev; +Cc: Thomas Monjalon, Xiao Wang, Olivier Matz

On 1/18/2018 3:14 AM, Yuanhan Liu wrote:
> Due to a mistake operation from me, older version (v10) was merged to
> master branch. It's the v11 should be applied. However, the master branch
> is not rebase-able. Thus, this patch is made, from the diff between v10
> and v11.
> 
> Code is from Xiao Wang.

    Fixes: 45ae05df824c ("net: add a helper for making RARP packet")
    Fixes: c3ffdba0e88a ("vhost: use API to make RARP packet")

Please correct me if there are wrong.

> 
> Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
> ---
>  lib/librte_net/rte_arp.c      | 26 +++++++++++++++++---------
>  lib/librte_net/rte_arp.h      | 11 ++++++-----
>  lib/librte_vhost/virtio_net.c | 12 +++---------
>  3 files changed, 26 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> index d7223b0..b953bcd 100644
> --- a/lib/librte_net/rte_arp.c
> +++ b/lib/librte_net/rte_arp.c
> @@ -7,17 +7,28 @@
>  #include <rte_arp.h>
>  
>  #define RARP_PKT_SIZE	64
> -int
> -rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
> +struct rte_mbuf *
> +rte_net_make_rarp_packet(struct rte_mempool *mpool,
> +		const struct ether_addr *mac)
>  {
>  	struct ether_hdr *eth_hdr;
>  	struct arp_hdr *rarp;
> +	struct rte_mbuf *mbuf;
>  
> -	if (mbuf->buf_len < RARP_PKT_SIZE)
> -		return -1;
> +	if (mpool == NULL)
> +		return NULL;
> +
> +	mbuf = rte_pktmbuf_alloc(mpool);
> +	if (mbuf == NULL)
> +		return NULL;
> +
> +	eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(mbuf, RARP_PKT_SIZE);
> +	if (eth_hdr == NULL) {
> +		rte_pktmbuf_free(mbuf);
> +		return NULL;
> +	}
>  
>  	/* Ethernet header. */
> -	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
>  	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
>  	ether_addr_copy(mac, &eth_hdr->s_addr);
>  	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> @@ -35,8 +46,5 @@ rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
>  	memset(&rarp->arp_data.arp_sip, 0x00, 4);
>  	memset(&rarp->arp_data.arp_tip, 0x00, 4);
>  
> -	mbuf->data_len = RARP_PKT_SIZE;
> -	mbuf->pkt_len = RARP_PKT_SIZE;
> -
> -	return 0;
> +	return mbuf;
>  }
> diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
> index dad7423..457a39b 100644
> --- a/lib/librte_net/rte_arp.h
> +++ b/lib/librte_net/rte_arp.h
> @@ -82,16 +82,17 @@ struct arp_hdr {
>   *
>   * Make a RARP packet based on MAC addr.
>   *
> - * @param mbuf
> - *   Pointer to the rte_mbuf structure
> + * @param mpool
> + *   Pointer to the rte_mempool
>   * @param mac
>   *   Pointer to the MAC addr
>   *
>   * @return
> - *   - 0 on success, negative on error
> + *   - RARP packet pointer on success, or NULL on error
>   */
> -int
> -rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
> +struct rte_mbuf *
> +rte_net_make_rarp_packet(struct rte_mempool *mpool,
> +		const struct ether_addr *mac);
>  
>  #ifdef __cplusplus
>  }
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index ca89288..a1d8026 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -1162,19 +1162,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>  			rte_atomic16_cmpset((volatile uint16_t *)
>  				&dev->broadcast_rarp.cnt, 1, 0))) {
>  
> -		rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool);
> +		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
>  		if (rarp_mbuf == NULL) {
>  			RTE_LOG(ERR, VHOST_DATA,
> -				"Failed to allocate memory for mbuf.\n");
> +				"Failed to make RARP packet.\n");
>  			return 0;
>  		}
> -
> -		if (rte_net_make_rarp_packet(rarp_mbuf, &dev->mac) < 0) {
> -			rte_pktmbuf_free(rarp_mbuf);
> -			rarp_mbuf = NULL;
> -		} else {
> -			count -= 1;
> -		}
> +		count -= 1;
>  	}
>  
>  	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
> 

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
@ 2018-01-19 17:33                                           ` Ferruh Yigit
  2018-01-20 14:31                                             ` Ferruh Yigit
  0 siblings, 1 reply; 112+ messages in thread
From: Ferruh Yigit @ 2018-01-19 17:33 UTC (permalink / raw)
  To: Xiao Wang, yliu, olivier.matz, maxime.coquelin, Thomas Monjalon
  Cc: dev, thomas, tiwei.bie, stephen

On 1/16/2018 9:41 PM, Xiao Wang wrote:
> When live migration is done, for the backup VM, either the virtio
> frontend or the vhost backend needs to send out gratuitous RARP packet
> to announce its new network location.
> 
> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
> migration scenario where the vhost backend doesn't have the ability to
> generate RARP packet.
> 
> Brief introduction of the work flow:
> 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> 2. Virtio interrupt handler reads out the interrupt status value, and
>    realizes it needs to send out RARP packet to announce its location.
> 3. Pause device to stop worker thread touching the queues.
> 4. Inject a RARP packet into a Tx Queue.
> 5. Ack the interrupt via control queue.
> 6. Resume device to continue packet processing.
> 
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>


Hi Yuanhan,

This commit breaks the build!

As far as I understand you send a fix but merged into other patch, which leaves
this commit still broken.

What do you think sending a fix that can be mergable to this one, so I can
squash it on next-net?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-19 17:33                                           ` Ferruh Yigit
@ 2018-01-20 14:31                                             ` Ferruh Yigit
  2018-01-21  1:31                                               ` Wang, Xiao W
  0 siblings, 1 reply; 112+ messages in thread
From: Ferruh Yigit @ 2018-01-20 14:31 UTC (permalink / raw)
  To: Xiao Wang, yliu, olivier.matz, maxime.coquelin, Thomas Monjalon
  Cc: dev, tiwei.bie, stephen

On 1/19/2018 5:33 PM, Ferruh Yigit wrote:
> On 1/16/2018 9:41 PM, Xiao Wang wrote:
>> When live migration is done, for the backup VM, either the virtio
>> frontend or the vhost backend needs to send out gratuitous RARP packet
>> to announce its new network location.
>>
>> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support live
>> migration scenario where the vhost backend doesn't have the ability to
>> generate RARP packet.
>>
>> Brief introduction of the work flow:
>> 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
>> 2. Virtio interrupt handler reads out the interrupt status value, and
>>    realizes it needs to send out RARP packet to announce its location.
>> 3. Pause device to stop worker thread touching the queues.
>> 4. Inject a RARP packet into a Tx Queue.
>> 5. Ack the interrupt via control queue.
>> 6. Resume device to continue packet processing.
>>
>> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> 
> Hi Yuanhan,
> 
> This commit breaks the build!

I switched two patches and problem gone, like:
first: net: fixup RARP generation
second: net/virtio: support GUEST ANNOUNCE

>From my point of view nothing more needs to be done, but can you please double
check the patches.

Thanks,
ferruh

> 
> As far as I understand you send a fix but merged into other patch, which leaves
> this commit still broken.
> 
> What do you think sending a fix that can be mergable to this one, so I can
> squash it on next-net?
> 
> Thanks,
> ferruh
> 

^ permalink raw reply	[flat|nested] 112+ messages in thread

* Re: [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE
  2018-01-20 14:31                                             ` Ferruh Yigit
@ 2018-01-21  1:31                                               ` Wang, Xiao W
  0 siblings, 0 replies; 112+ messages in thread
From: Wang, Xiao W @ 2018-01-21  1:31 UTC (permalink / raw)
  To: Yigit, Ferruh, yliu, olivier.matz, maxime.coquelin, Thomas Monjalon
  Cc: dev, Bie, Tiwei, stephen



> -----Original Message-----
> From: Yigit, Ferruh
> Sent: Saturday, January 20, 2018 10:31 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>; yliu@fridaylinux.org;
> olivier.matz@6wind.com; maxime.coquelin@redhat.com; Thomas Monjalon
> <thomas@monjalon.net>
> Cc: dev@dpdk.org; Bie, Tiwei <tiwei.bie@intel.com>;
> stephen@networkplumber.org
> Subject: Re: [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE
> 
> On 1/19/2018 5:33 PM, Ferruh Yigit wrote:
> > On 1/16/2018 9:41 PM, Xiao Wang wrote:
> >> When live migration is done, for the backup VM, either the virtio
> >> frontend or the vhost backend needs to send out gratuitous RARP packet
> >> to announce its new network location.
> >>
> >> This patch enables VIRTIO_NET_F_GUEST_ANNOUNCE feature to support
> live
> >> migration scenario where the vhost backend doesn't have the ability to
> >> generate RARP packet.
> >>
> >> Brief introduction of the work flow:
> >> 1. QEMU finishes live migration, pokes the backup VM with an interrupt.
> >> 2. Virtio interrupt handler reads out the interrupt status value, and
> >>    realizes it needs to send out RARP packet to announce its location.
> >> 3. Pause device to stop worker thread touching the queues.
> >> 4. Inject a RARP packet into a Tx Queue.
> >> 5. Ack the interrupt via control queue.
> >> 6. Resume device to continue packet processing.
> >>
> >> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> >> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >
> >
> > Hi Yuanhan,
> >
> > This commit breaks the build!
> 
> I switched two patches and problem gone, like:
> first: net: fixup RARP generation
> second: net/virtio: support GUEST ANNOUNCE
> 
> From my point of view nothing more needs to be done, but can you please
> double
> check the patches.

The 2 patches are OK.
Thanks!

BRs,
Xiao
> 
> Thanks,
> ferruh
> 
> >
> > As far as I understand you send a fix but merged into other patch, which
> leaves
> > this commit still broken.
> >
> > What do you think sending a fix that can be mergable to this one, so I can
> > squash it on next-net?
> >
> > Thanks,
> > ferruh
> >


^ permalink raw reply	[flat|nested] 112+ messages in thread

end of thread, other threads:[~2018-01-21  1:31 UTC | newest]

Thread overview: 112+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-24 11:03 [dpdk-dev] [PATCH 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
2017-11-24 11:03 ` [dpdk-dev] [PATCH 1/2] net/virtio: make control queue thread-safe Xiao Wang
2017-11-24  5:38   ` Tiwei Bie
2017-11-30  2:10     ` Wang, Xiao W
2017-11-30  2:59   ` Stephen Hemminger
2017-12-01  1:38     ` Wang, Xiao W
2017-12-04 14:02   ` [dpdk-dev] [PATCH v2 0/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 1/2] net/virtio: make control queue thread-safe Xiao Wang
2017-12-04 14:02     ` [dpdk-dev] [PATCH v2 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
2017-12-04  8:46       ` Tiwei Bie
2018-01-03  1:37         ` Wang, Xiao W
2018-01-03  8:42         ` Wang, Xiao W
2017-12-06 11:23       ` Tiwei Bie
2017-12-06 14:22         ` Yuanhan Liu
2018-01-03  1:41         ` Wang, Xiao W
2018-01-04  7:41       ` [dpdk-dev] [PATCH v3 0/2] " Xiao Wang
2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 1/2] net/virtio: make control queue thread-safe Xiao Wang
2018-01-04  7:41         ` [dpdk-dev] [PATCH v3 2/2] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-04  2:51           ` Tiwei Bie
2018-01-04  7:11             ` Wang, Xiao W
2018-01-04 15:59           ` [dpdk-dev] [PATCH v4 0/3] " Xiao Wang
2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 1/3] net/virtio: make control queue thread-safe Xiao Wang
2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Xiao Wang
2018-01-04  7:56               ` Tiwei Bie
2018-01-05 16:46               ` [dpdk-dev] [PATCH v5 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 1/3] net/virtio: make control queue thread-safe Xiao Wang
2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 2/3] net/virtio: add packet injection method Xiao Wang
2018-01-05 18:00                   ` Tiwei Bie
2018-01-07  2:37                     ` Wang, Xiao W
2018-01-05 16:46                 ` [dpdk-dev] [PATCH v5 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-05 17:56                   ` Tiwei Bie
2018-01-07  2:29                     ` Wang, Xiao W
2018-01-07 12:05                   ` [dpdk-dev] [PATCH v6 0/3] " Xiao Wang
2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 1/3] net/virtio: make control queue thread-safe Xiao Wang
2018-01-08 13:06                       ` Yuanhan Liu
2018-01-08 15:25                         ` Wang, Xiao W
2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 2/3] net/virtio: add packet injection method Xiao Wang
2018-01-08 13:03                       ` Yuanhan Liu
2018-01-08 15:11                         ` Wang, Xiao W
2018-01-09  2:55                           ` Wang, Xiao W
2018-01-09 14:26                       ` [dpdk-dev] [PATCH v7 0/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 1/3] net/virtio: make control queue thread-safe Xiao Wang
2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 2/3] net/virtio: add packet injection method Xiao Wang
2018-01-09 14:26                         ` [dpdk-dev] [PATCH v7 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-09  8:49                           ` Maxime Coquelin
2018-01-09 10:58                             ` Wang, Xiao W
2018-01-09 11:03                             ` Wang, Xiao W
2018-01-09 11:41                               ` Thomas Monjalon
2018-01-09 13:36                                 ` Yuanhan Liu
2018-01-09 13:26                           ` [dpdk-dev] [PATCH v8 0/5] " Xiao Wang
2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 1/5] net/virtio: make control queue thread-safe Xiao Wang
2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 2/5] net/virtio: add packet injection method Xiao Wang
2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 3/5] net: add a helper for making RARP packet Xiao Wang
2018-01-09 13:48                               ` Thomas Monjalon
2018-01-09 15:52                                 ` Wang, Xiao W
2018-01-09 16:09                               ` [dpdk-dev] [PATCH v9 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 1/5] net/virtio: make control queue thread-safe Xiao Wang
2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 2/5] net/virtio: add packet injection method Xiao Wang
2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 3/5] net: add a helper for making RARP packet Xiao Wang
2018-01-09 17:22                                   ` Thomas Monjalon
2018-01-10  1:23                                   ` [dpdk-dev] [PATCH v10 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 1/5] net/virtio: make control queue thread-safe Xiao Wang
2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 2/5] net/virtio: add packet injection method Xiao Wang
2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP packet Xiao Wang
2018-01-10 13:06                                       ` Yuanhan Liu
2018-01-10 14:10                                         ` Thomas Monjalon
2018-01-16  9:01                                       ` Olivier Matz
2018-01-16  9:43                                         ` Wang, Xiao W
2018-01-16 10:42                                           ` Olivier Matz
2018-01-16 11:03                                             ` Wang, Xiao W
2018-01-16 11:42                                             ` Wang, Xiao W
2018-01-16 21:40                                       ` [dpdk-dev] [PATCH v11 0/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-16 21:40                                         ` [dpdk-dev] [PATCH v11 1/5] net/virtio: make control queue thread-safe Xiao Wang
2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 2/5] net/virtio: add packet injection method Xiao Wang
2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 3/5] net: add a helper for making RARP packet Xiao Wang
2018-01-16 14:29                                           ` Olivier Matz
2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 4/5] vhost: use lib API to make " Xiao Wang
2018-01-16 21:41                                         ` [dpdk-dev] [PATCH v11 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-19 17:33                                           ` Ferruh Yigit
2018-01-20 14:31                                             ` Ferruh Yigit
2018-01-21  1:31                                               ` Wang, Xiao W
2018-01-18  3:09                                         ` [dpdk-dev] [PATCH v11 0/5] " Yuanhan Liu
2018-01-18  3:14                                           ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Yuanhan Liu
2018-01-18  3:14                                             ` [dpdk-dev] [PATCH 2/2] net: fix build error Yuanhan Liu
2018-01-18  7:38                                               ` Thomas Monjalon
2018-01-18  7:45                                                 ` Wang, Xiao W
2018-01-18  8:03                                                   ` Yuanhan Liu
2018-01-18  8:36                                                     ` Thomas Monjalon
2018-01-18  8:48                                                       ` Yuanhan Liu
2018-01-18  8:38                                             ` [dpdk-dev] [PATCH 1/2] net: fixup RARP generation Thomas Monjalon
2018-01-18  8:51                                               ` Yuanhan Liu
2018-01-18  8:53                                                 ` Wang, Xiao W
2018-01-19 16:04                                             ` Ferruh Yigit
2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 4/5] vhost: use lib API to make RARP packet Xiao Wang
2018-01-10  1:23                                     ` [dpdk-dev] [PATCH v10 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 4/5] vhost: use lib API to make RARP packet Xiao Wang
2018-01-09 16:09                                 ` [dpdk-dev] [PATCH v9 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 4/5] vhost: use lib API to make RARP packet Xiao Wang
2018-01-09 13:26                             ` [dpdk-dev] [PATCH v8 5/5] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-09 14:38                             ` [dpdk-dev] [PATCH v8 0/5] " Maxime Coquelin
2018-01-07 12:05                     ` [dpdk-dev] [PATCH v6 3/3] " Xiao Wang
2018-01-05 20:27               ` [dpdk-dev] [PATCH v4 2/3] net/virtio: add packet injection method Stephen Hemminger
2018-01-06  4:41                 ` Tiwei Bie
2018-01-04 15:59             ` [dpdk-dev] [PATCH v4 3/3] net/virtio: support GUEST ANNOUNCE Xiao Wang
2018-01-04 11:13               ` Tiwei Bie
2017-11-24 11:04 ` [dpdk-dev] [PATCH 2/2] " Xiao Wang
2017-11-24  6:04   ` Tiwei Bie
2017-11-30  2:37     ` Wang, Xiao W
2017-11-27 12:48   ` Yuanhan Liu
2017-11-30  2:41     ` Wang, Xiao W
2017-12-05 14:26       ` Yuanhan Liu
2018-01-03  1:43         ` Wang, Xiao W

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).