From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from bombadil.infradead.org (bombadil.infradead.org [65.50.211.133]) by dpdk.org (Postfix) with ESMTP id 07FFB7D97 for ; Mon, 21 Aug 2017 09:47:07 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=bombadil.20170209; h=Message-Id:Date:Subject:Cc:To:From: Sender:Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:In-Reply-To:References:List-Id: List-Help:List-Unsubscribe:List-Subscribe:List-Post:List-Owner:List-Archive; bh=kIeXc36ZdwJn8Zvdzp2y6dLxS6r0mlyB3z6l7gvFIG4=; b=OsG8fOOtiJyygRS3rskGGGtNc iGRau8JStpZWA718VaIn5m6JB9eCh+UcDwz3y7VrrNx4dkeoGZA6KlLaVlTu7nrhMkeYdczx50rvX h0vqHq4ZbHixLwhacnnFnzyIaeacR398FuSWVJgcFpwKkbzBVPxC14bUJksqgtUFBJ4hzKn2Htbro vCoYCWfFOtYywJ9d9HoTJcKfJaOiSvSj4qhMwFV7aWK4/LEUKDgozQgIEZ6AvFd6siRVhc8pWZZ77 SeScak4HvLTJOHdpsvngpkCCfOzFWA8LF9SrlOBS62t+hFk5iHhYiqHdnDfiYtXiClKPEqBf3SUBC sIHmFkznw==; Received: from bzq-82-81-101-184.red.bezeqint.net ([82.81.101.184] helo=bombadil.infradead.org) by bombadil.infradead.org with esmtpsa (Exim 4.87 #1 (Red Hat Linux)) id 1djhQQ-0000ZK-6p; Mon, 21 Aug 2017 07:47:06 +0000 From: Sagi Grimberg To: dev@dpdk.org Cc: Nelio Laranjeiro , Adrien Mazarguil Date: Mon, 21 Aug 2017 10:47:00 +0300 Message-Id: <1503301622-14220-1-git-send-email-sagi@grimberg.me> X-Mailer: git-send-email 2.7.4 Subject: [dpdk-dev] [PATCH 0/2] mlx5 high latency observed on send operations X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 21 Aug 2017 07:47:08 -0000 When measuring latency when running a latency critical workload on mlx5 pmd drivers we noticed high latency can occur due to delayed doorbell record update flush. This can be reproduced using the simple program [1] against testpmd macswap fwd mode. This utility sends a raw ethernet frame to the dpdk port and measures the time between send and the received mirrored frame. This patchset guarantees immediate doorbell updates visibility by making the doorbell a non-cacheble memory. In addition, we relax the memory barrier for dma-able memory. Without this fix the tsc delta was 3550760-5993019 cycles (which translates to 2-6 ms on 1.7 GHz processor). With the fix applied the tsc delta reduced to 17740-29663 (wich translates to 9-17 us). Shahaf Shuler (2): net/mlx5: replace memory barrier type net/mlx5: don't map doorbell register to write combining drivers/net/mlx5/mlx5.c | 2 ++ drivers/net/mlx5/mlx5_rxtx.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) [1]: /* * compiling: gcc test.c -o test * run using: ./test */ #include #include #include #include #include #include #include #include #include #define BUF_SIZ 1024 static inline uint64_t rte_rdtsc(void) { union { uint64_t tsc_64; struct { uint32_t lo_32; uint32_t hi_32; }; } tsc; asm volatile("rdtsc" : "=a" (tsc.lo_32), "=d" (tsc.hi_32)); return tsc.tsc_64; } int main(int argc, char *argv[]) { int sockfd; struct ifreq if_idx; struct ifreq if_mac; int tx_len = 0; char sendbuf[BUF_SIZ]; struct ether_header *eh = (struct ether_header *) sendbuf; struct sockaddr_ll socket_address; char ifname[IFNAMSIZ]; int values[6]; struct ether_header expected; uint64_t payload = 0xB16B00B5; uint8_t buffer[1024]; int result; uint64_t before_rcv; uint64_t after_rcv; uint64_t delta; int numbytes; if (argc != 3) { fprintf(stderr, "device name and dest mac\n"); return -1; } strcpy(ifname, argv[1]); result = sscanf(argv[2], "%x:%x:%x:%x:%x:%x", &values[0], &values[1], &values[2], &values[3], &values[4], &values[5]); if (result != 6) { fprintf(stderr, "invalid mac\n"); return -1; } /* Open RAW socket to send on */ if ((sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) == -1) { perror("socket"); } /* Get the index of the interface to send on */ memset(&if_idx, 0, sizeof(struct ifreq)); strncpy(if_idx.ifr_name, ifname, IFNAMSIZ-1); if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0) perror("SIOCGIFINDEX"); /* Get the MAC address of the interface to send on */ memset(&if_mac, 0, sizeof(struct ifreq)); strncpy(if_mac.ifr_name, ifname, IFNAMSIZ-1); if (ioctl(sockfd, SIOCGIFHWADDR, &if_mac) < 0) perror("SIOCGIFHWADDR"); /* Construct the Ethernet header */ memset(sendbuf, 0, BUF_SIZ); /* Ethernet header */ eh->ether_shost[0] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[0]; eh->ether_shost[1] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[1]; eh->ether_shost[2] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[2]; eh->ether_shost[3] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[3]; eh->ether_shost[4] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[4]; eh->ether_shost[5] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[5]; eh->ether_dhost[0] = values[0]; eh->ether_dhost[1] = values[1]; eh->ether_dhost[2] = values[2]; eh->ether_dhost[3] = values[3]; eh->ether_dhost[4] = values[4]; eh->ether_dhost[5] = values[5]; /* Ethertype field */ eh->ether_type = htons(ETH_P_IP); tx_len += sizeof(struct ether_header); memcpy(&sendbuf[tx_len], &payload, sizeof(payload)); tx_len += sizeof(payload); /* Index of the network device */ socket_address.sll_ifindex = if_idx.ifr_ifindex; /* Address length*/ socket_address.sll_halen = ETH_ALEN; /* Destination MAC */ socket_address.sll_addr[0] = values[0]; socket_address.sll_addr[1] = values[1]; socket_address.sll_addr[2] = values[2]; socket_address.sll_addr[3] = values[3]; socket_address.sll_addr[4] = values[4]; socket_address.sll_addr[5] = values[5]; memcpy(&expected.ether_dhost, &eh->ether_shost, ETH_ALEN); memcpy(&expected.ether_shost, &eh->ether_dhost, ETH_ALEN); expected.ether_type = eh->ether_type; /* Send packet */ if (sendto(sockfd, sendbuf, tx_len, 0, (struct sockaddr*)&socket_address, sizeof(struct sockaddr_ll)) < 0) { printf("Send failed\n"); return -2; } before_rcv = rte_rdtsc(); while (1) { numbytes = recvfrom(sockfd, buffer, BUF_SIZ, 0, NULL, NULL); if (numbytes <= 0) continue; after_rcv = rte_rdtsc(); if (memcmp(&expected, buffer, sizeof(expected)) != 0) continue; if (memcmp(&payload, &buffer[sizeof(expected)], sizeof(payload)) == 0) { break; } } delta = after_rcv - before_rcv; printf("RTT is %lu tsc \n", delta); return 0; } -- 2.7.4