From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 244E4C568 for ; Fri, 29 Jan 2016 05:57:30 +0100 (CET) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga102.jf.intel.com with ESMTP; 28 Jan 2016 20:57:30 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,362,1449561600"; d="scan'208";a="891622475" Received: from yliu-dev.sh.intel.com ([10.239.66.49]) by fmsmga001.fm.intel.com with ESMTP; 28 Jan 2016 20:57:30 -0800 From: Yuanhan Liu To: dev@dpdk.org Date: Fri, 29 Jan 2016 12:58:01 +0800 Message-Id: <1454043483-24579-7-git-send-email-yuanhan.liu@linux.intel.com> X-Mailer: git-send-email 1.9.0 In-Reply-To: <1454043483-24579-1-git-send-email-yuanhan.liu@linux.intel.com> References: <1450321921-27799-1-git-send-email-yuanhan.liu@linux.intel.com> <1454043483-24579-1-git-send-email-yuanhan.liu@linux.intel.com> Cc: "Michael S. Tsirkin" , Victor Kaplansky Subject: [dpdk-dev] [PATCH v3 6/8] vhost: handle VHOST_USER_SEND_RARP request X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 29 Jan 2016 04:57:31 -0000 While in former patch we enabled GUEST_ANNOUNCE feature, so that the guest OS will broadcast a GARP message after migration to notify the switch about the new location of migrated VM, the thing is that GUEST_ANNOUNCE is enabled since kernel v3.5 only. For older kernel, VHOST_USER_SEND_RARP request comes to rescue. The payload of this new request is the mac address of the migrated VM, with that, we could construct a RARP message, and then broadcast it to host interfaces. That's how this patch works: - list all interfaces, with the help of SIOCGIFCONF ioctl command - construct an RARP message and broadcast it Cc: Thibaut Collet Signed-off-by: Yuanhan Liu --- Note that this patch did take effect in my test: - it indeed updated target vswitch's mac learning table. (with the "ovs fdb/show bridge" command) - the ping request packets after migration were indeeded flowed to the target (but not the source) host's vswitch. (with tcpdump command) However, I still saw ping lost. I asked help from Thibaut, the original author of the VHOST_USER_SEND_RARP request, he suggested that it might be an issue of my network topo, or ovs settings, which is likely, regarding to what I observed above. Anyway, I'd like to send this out, hopefully someone knows what's wrong there if there any. In the meantime, I will do more debugs. --- lib/librte_vhost/vhost_user/vhost-net-user.c | 4 + lib/librte_vhost/vhost_user/vhost-net-user.h | 1 + lib/librte_vhost/vhost_user/virtio-net-user.c | 125 ++++++++++++++++++++++++++ lib/librte_vhost/vhost_user/virtio-net-user.h | 5 +- 4 files changed, 134 insertions(+), 1 deletion(-) diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c index 32ad6f6..cb18396 100644 --- a/lib/librte_vhost/vhost_user/vhost-net-user.c +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c @@ -100,6 +100,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", }; /** @@ -437,6 +438,9 @@ vserver_message_handler(int connfd, void *dat, int *remove) case VHOST_USER_SET_VRING_ENABLE: user_set_vring_enable(ctx, &msg.payload.state); break; + case VHOST_USER_SEND_RARP: + user_send_rarp(&msg); + break; default: break; diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h index 6d252a3..e3bb413 100644 --- a/lib/librte_vhost/vhost_user/vhost-net-user.h +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h @@ -67,6 +67,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_PROTOCOL_FEATURES = 16, VHOST_USER_GET_QUEUE_NUM = 17, VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, VHOST_USER_MAX } VhostUserRequest; diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c index 0f3b163..cda330d 100644 --- a/lib/librte_vhost/vhost_user/virtio-net-user.c +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c @@ -34,11 +34,18 @@ #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include +#include +#include #include #include @@ -413,3 +420,121 @@ user_set_log_base(struct vhost_device_ctx ctx, return 0; } + +#define RARP_BUF_SIZE 64 + +static void +make_rarp_packet(uint8_t *buf, uint8_t *mac) +{ + struct ether_header *eth_hdr; + struct ether_arp *rarp; + + /* Ethernet header. */ + eth_hdr = (struct ether_header *)buf; + memset(ð_hdr->ether_dhost, 0xff, ETH_ALEN); + memcpy(ð_hdr->ether_shost, mac, ETH_ALEN); + eth_hdr->ether_type = htons(ETH_P_RARP); + + /* RARP header. */ + rarp = (struct ether_arp *)(eth_hdr + 1); + rarp->ea_hdr.ar_hrd = htons(ARPHRD_ETHER); + rarp->ea_hdr.ar_pro = htons(ETHERTYPE_IP); + rarp->ea_hdr.ar_hln = ETH_ALEN; + rarp->ea_hdr.ar_pln = 4; + rarp->ea_hdr.ar_op = htons(ARPOP_RREQUEST); + + memcpy(&rarp->arp_sha, mac, ETH_ALEN); + memset(&rarp->arp_spa, 0x00, 4); + memcpy(&rarp->arp_tha, mac, 6); + memset(&rarp->arp_tpa, 0x00, 4); +} + + +static void +send_rarp(const char *ifname, uint8_t *rarp) +{ + int fd; + struct ifreq ifr; + struct sockaddr_ll addr; + + fd = socket(AF_PACKET, SOCK_RAW, 0); + if (fd < 0) { + perror("socket failed"); + return; + } + + memset(&ifr, 0, sizeof(struct ifreq)); + strncpy(ifr.ifr_name, ifname, IFNAMSIZ); + if (ioctl(fd, SIOCGIFINDEX, &ifr) < 0) { + perror("failed to get interface index"); + close(fd); + return; + } + + addr.sll_ifindex = ifr.ifr_ifindex; + addr.sll_halen = ETH_ALEN; + + if (sendto(fd, rarp, RARP_BUF_SIZE, 0, + (const struct sockaddr*)&addr, sizeof(addr)) < 0) { + perror("send rarp packet failed"); + } +} + + +/* + * Broadcast a RARP message to all interfaces, to update + * switch's mac table + */ +int +user_send_rarp(struct VhostUserMsg *msg) +{ + uint8_t *mac = (uint8_t *)&msg->payload.u64; + uint8_t rarp[RARP_BUF_SIZE]; + struct ifconf ifc = {0, }; + struct ifreq *ifr; + int nr = 16; + int fd; + uint32_t i; + + RTE_LOG(DEBUG, VHOST_CONFIG, + ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + + make_rarp_packet(rarp, mac); + + /* + * Get all interfaces + */ + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + perror("failed to create AF_INET socket"); + return -1; + } + +again: + ifc.ifc_len = sizeof(*ifr) * nr; + ifc.ifc_buf = realloc(ifc.ifc_buf, ifc.ifc_len); + + if (ioctl(fd, SIOCGIFCONF, &ifc) < 0) { + perror("failed at SIOCGIFCONF"); + close(fd); + return -1; + } + + if (ifc.ifc_len == (int)sizeof(struct ifreq) * nr) { + /* + * current ifc_buf is not big enough to hold + * all interfaces; double it and try again. + */ + nr *= 2; + goto again; + } + + ifr = (struct ifreq *)ifc.ifc_buf; + for (i = 0; i < ifc.ifc_len / sizeof(struct ifreq); i++) + send_rarp(ifr[i].ifr_name, rarp); + + close(fd); + + return 0; +} diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h index 013cf38..1e9ff9a 100644 --- a/lib/librte_vhost/vhost_user/virtio-net-user.h +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h @@ -38,8 +38,10 @@ #include "vhost-net-user.h" #define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_RARP 2 -#define VHOST_USER_PROTOCOL_FEATURES (1ULL << VHOST_USER_PROTOCOL_F_MQ) +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_RARP)) int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *); @@ -50,6 +52,7 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *); void user_set_protocol_features(struct vhost_device_ctx ctx, uint64_t protocol_features); int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *); +int user_send_rarp(struct VhostUserMsg *); int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); -- 1.9.0