DPDK patches and discussions
 help / color / mirror / Atom feed
From: Shahaf Shuler <shahafs@mellanox.com>
To: nelio.laranjeiro@6wind.com, adrien.mazarguil@6wind.com
Cc: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 0/2] mlx5 high latency observed on send operations
Date: Sun, 27 Aug 2017 09:47:07 +0300	[thread overview]
Message-ID: <cover.1503816096.git.shahafs@mellanox.com> (raw)
In-Reply-To: <1503301622-14220-1-git-send-email-sagi@grimberg.me>

from sagi@grimberg.me:

When measuring latency when running a latency critical workload on mlx5 pmd drivers we noticed high latency can occur due to delayed doorbell record update flush.

This can be reproduced using the simple program [1] against testpmd macswap fwd mode. This utility sends a raw ethernet frame to the dpdk port and measures the time between send and the received mirrored frame.

This patchset guarantees immediate doorbell updates visibility by making the doorbell a non-cacheble memory.
In addition, we relax the memory barrier for dma-able memory.

Without this fix the tsc delta was 3550760-5993019 cycles (which translates to 2-6 ms on 1.7 GHz processor).

With the fix applied the tsc delta reduced to 17740-29663 (wich translates to 9-17 us).

on v2:
 * replace compiler barrier with rte_io_wmb.

Shahaf Shuler (2):
  net/mlx5: replace memory barrier type
  net/mlx5: don't map doorbell register to write combining

 drivers/net/mlx5/mlx5.c      | 2 ++
 drivers/net/mlx5/mlx5_rxtx.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

[1]:
/*
 * compiling: gcc test.c -o test
 * run using: ./test <local_iface> <dest_mac>  */ #include <arpa/inet.h> #include <linux/if_packet.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <net/if.h> #include <netinet/ether.h>

#define BUF_SIZ		1024

static inline uint64_t rte_rdtsc(void)
{
	union {
		uint64_t tsc_64;
		struct {
			uint32_t lo_32;
			uint32_t hi_32;
		};
	} tsc;

	asm volatile("rdtsc" :
		     "=a" (tsc.lo_32),
		     "=d" (tsc.hi_32));
	return tsc.tsc_64;
}

int main(int argc, char *argv[])
{
	int sockfd;
	struct ifreq if_idx;
	struct ifreq if_mac;
	int tx_len = 0;
	char sendbuf[BUF_SIZ];
	struct ether_header *eh = (struct ether_header *) sendbuf;
	struct sockaddr_ll socket_address;
	char ifname[IFNAMSIZ];
	int values[6];
	struct ether_header expected;
	uint64_t payload = 0xB16B00B5;
	uint8_t buffer[1024];
	int result;
	uint64_t before_rcv;
	uint64_t after_rcv;
	uint64_t delta;
	int numbytes;

	if (argc != 3) {
		fprintf(stderr, "device name and dest mac\n");
		return -1;
	}

	strcpy(ifname, argv[1]);
	result = sscanf(argv[2], "%x:%x:%x:%x:%x:%x",
			&values[0], &values[1], &values[2], &values[3], &values[4], &values[5]);
	if (result != 6) {
		fprintf(stderr, "invalid mac\n");
		return -1;
	}

	/* Open RAW socket to send on */
	if ((sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) == -1) {
	    perror("socket");
	}

	/* Get the index of the interface to send on */
	memset(&if_idx, 0, sizeof(struct ifreq));
	strncpy(if_idx.ifr_name, ifname, IFNAMSIZ-1);
	if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0)
	    perror("SIOCGIFINDEX");
	/* Get the MAC address of the interface to send on */
	memset(&if_mac, 0, sizeof(struct ifreq));
	strncpy(if_mac.ifr_name, ifname, IFNAMSIZ-1);
	if (ioctl(sockfd, SIOCGIFHWADDR, &if_mac) < 0)
	    perror("SIOCGIFHWADDR");

	/* Construct the Ethernet header */
	memset(sendbuf, 0, BUF_SIZ);
	/* Ethernet header */
	eh->ether_shost[0] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[0];
	eh->ether_shost[1] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[1];
	eh->ether_shost[2] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[2];
	eh->ether_shost[3] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[3];
	eh->ether_shost[4] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[4];
	eh->ether_shost[5] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[5];
	eh->ether_dhost[0] = values[0];
	eh->ether_dhost[1] = values[1];
	eh->ether_dhost[2] = values[2];
	eh->ether_dhost[3] = values[3];
	eh->ether_dhost[4] = values[4];
	eh->ether_dhost[5] = values[5];
	/* Ethertype field */
	eh->ether_type = htons(ETH_P_IP);
	tx_len += sizeof(struct ether_header);

	memcpy(&sendbuf[tx_len], &payload, sizeof(payload));
	tx_len += sizeof(payload);

	/* Index of the network device */
	socket_address.sll_ifindex = if_idx.ifr_ifindex;
	/* Address length*/
	socket_address.sll_halen = ETH_ALEN;
	/* Destination MAC */
	socket_address.sll_addr[0] = values[0];
	socket_address.sll_addr[1] = values[1];
	socket_address.sll_addr[2] = values[2];
	socket_address.sll_addr[3] = values[3];
	socket_address.sll_addr[4] = values[4];
	socket_address.sll_addr[5] = values[5];

	memcpy(&expected.ether_dhost, &eh->ether_shost, ETH_ALEN);
	memcpy(&expected.ether_shost, &eh->ether_dhost, ETH_ALEN);
	expected.ether_type = eh->ether_type;


	/* Send packet */
	if (sendto(sockfd, sendbuf, tx_len, 0, (struct sockaddr*)&socket_address, sizeof(struct sockaddr_ll)) < 0) {
	    printf("Send failed\n");
	    return -2;
	}

	before_rcv = rte_rdtsc();
	while (1) {
		numbytes = recvfrom(sockfd, buffer, BUF_SIZ, 0, NULL, NULL);
		if (numbytes <= 0)
			continue;
		after_rcv = rte_rdtsc();

		if (memcmp(&expected, buffer, sizeof(expected)) != 0)
			continue;

		if (memcmp(&payload, &buffer[sizeof(expected)], sizeof(payload)) == 0) {
			break;
		}

	}

	delta =  after_rcv - before_rcv;
	printf("RTT is %lu tsc \n", delta);
	return 0;
}


-- 
2.12.0

  parent reply	other threads:[~2017-08-27  6:47 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-21  7:47 [dpdk-dev] [PATCH " Sagi Grimberg
2017-08-21  7:47 ` [dpdk-dev] [PATCH 1/2] net/mlx5: replace memory barrier type Sagi Grimberg
2017-08-23 11:39   ` Nélio Laranjeiro
2017-08-23 13:11     ` Bruce Richardson
2017-08-24  6:56       ` Shahaf Shuler
2017-08-24  9:27         ` Bruce Richardson
2017-08-21  7:47 ` [dpdk-dev] [PATCH 2/2] net/mlx5: don't map doorbell register to write combining Sagi Grimberg
2017-08-23 11:03   ` Ferruh Yigit
2017-08-23 12:06     ` Nélio Laranjeiro
2017-08-27  6:47 ` Shahaf Shuler [this message]
2017-08-27  6:47   ` [dpdk-dev] [PATCH v2 1/2] net/mlx5: replace memory barrier type Shahaf Shuler
2017-08-27  6:47   ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: don't map doorbell register to write combining Shahaf Shuler
2017-08-29 16:53   ` [dpdk-dev] [PATCH v2 0/2] mlx5 high latency observed on send operations Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cover.1503816096.git.shahafs@mellanox.com \
    --to=shahafs@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=nelio.laranjeiro@6wind.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).