DPDK usage discussions
 help / color / mirror / Atom feed
From: dfrolov@ubic.tech
To: users@dpdk.org
Subject: [dpdk-users] i40e driver Traffic Management API problem
Date: Wed, 15 May 2019 21:02:49 +0300	[thread overview]
Message-ID: <484121557943369@sas1-0a6c2e2b59d7.qloud-c.yandex.net> (raw)

Good day.
Please help me with Traffic Management API.

I tried to realize rate-limiters on TX direction for each traffic class. Traffic class is determined based on inner rules and it is no matter of current question.

First approach.

===

First of all I tried to do that with help of rte_eth_set_queue_rate_limit() function.
But for i40e driver that function is not work properly. First i can't set link seed properly. When i do it, according to API ( I set link speed to ETH_LINK_SPEED_40G at configuration, tries different values)
, during debugging I see that inside function rte_eth_set_queue_rate_limit() comparing bandwidth of rate limiter with real bandwidth of link is occurred. And that real bandwidth is stay zero. OK

After that statement, that I can't pass, the next is call of intrinsic function of i40e driver that should realize setting rate limiter for TX queue. But that function(pointer of function) has address is 0x0. That fact follow me to conclusion that function rte_eth_set_queue_rate_limit() is not work on i40e driver.

Second Approach.
=======
I spend several days to set Traffic Management configuration for i40e driver to do the test.

My node topology was following:

TC_NODE_1 ( with shaper_1) -> ROOD_NODE
TC_NODE_2 ( with shaper_2) -> ROOD_NODE
TC_NODE_3 ( with shaper_3) -> ROOD_NODE
TC_NODE_4 ( with shaper_4) -> ROOD_NODE

Queue_node_1 -> TC_NODE_1
Queue_node_2 -> TC_NODE_2
Queue_node_3 -> TC_NODE_3
Queue_node_4 -> TC_NODE_4

First of all I must said that TM on i40e is not working without DCB function. As I write understand, DCB with TM
have common pool of queues. But what I don't understand, is why I need to enable DCB on RX of interface when i configure TM on TX ? Should I enable 4 RX queues is I would like to use 4 queue_nodes ?

With one RX queue I get error during initialization : Initializing port 0... i40e_vsi_update_queue_mapping(): number of queues is less that tcs

Is it right or wrong?

Results:

I use iperf to test rate-limiter.

The test shows that in realized topology works shaper of last enabled TC nodes. When I change sequence of
registering of TC_nodes, I see that the rate limiter is changed in experiment.
More over all queues is shared one rate limiter. One flow is decrease speed of another.

I can't find working configuration. And attach code.
Could anybody say, is it my mistake or function is not properly realized yet?

main.c 
==================

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <netinet/in.h>
#include <setjmp.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <signal.h>
#include <stdbool.h>

#include <rte_common.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_lcore.h>
#include <rte_per_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_random.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_tm.h>



static volatile bool force_quit;

/* MAC updating enabled by default */
//static int mac_updating = 1;

#define NELEMS(x)  (sizeof(x) / sizeof((x)[0]))

#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1

#define NB_MBUF   8192

#define MAX_PKT_BURST 32
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
#define MEMPOOL_CACHE_SIZE 256

/*
 * Configurable number of RX/TX ring descriptors
 */
#define RTE_TEST_RX_DESC_DEFAULT 128
#define RTE_TEST_TX_DESC_DEFAULT 512
static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;

/* ethernet addresses of ports */
static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];

/* mask of enabled ports */
static uint32_t l2fwd_enabled_port_mask = 0;

/* list of enabled ports */
static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];

//static unsigned int l2fwd_rx_queue_per_lcore = 1;

#define MAX_RX_QUEUE_PER_LCORE 16
#define MAX_TX_QUEUE_PER_PORT 16
struct lcore_queue_conf {
	int rx_port;
	//unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
} __rte_cache_aligned;
struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];

static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];

struct COS_PARAM {
	uint64_t bw_Mps;
	uint64_t buff_dur_ms;
	uint32_t shaper_id;
	uint8_t queue_num;
	struct rte_tm_shaper_params shaper_param;
};

	const int COS_COUNT = 4;

static const struct rte_eth_conf port_conf = {
	.rxmode = {
		.split_hdr_size = 0,
		.mq_mode = ETH_MQ_RX_DCB,
	},
	.txmode = {
		.mq_mode = ETH_MQ_TX_NONE,
	},
	
	.rx_adv_conf = {
		.vmdq_dcb_conf = {
			.nb_queue_pools = ETH_8_POOLS,
			//.enable_default_pool = 0,
			//.default_pool = 0,
			//.nb_pool_maps = 0,
			//.pool_map = {{0, 0},},
			//.dcb_tc = {0,1,2,3,0,1,2,3},
		},
		.dcb_rx_conf = {
				.nb_tcs = ETH_4_TCS,
				// Traffic class each UP mapped to. 
				.dcb_tc = {0,1,2,3},
		},

	},
	/*
	.tx_adv_conf = {
		.vmdq_dcb_tx_conf = {
			.nb_queue_pools = ETH_8_POOLS,
			.dcb_tc = {0},
		},
	},
	*/
	
};

struct rte_mempool * l2fwd_pktmbuf_pool = NULL;

/* Per-port statistics struct */
struct l2fwd_port_statistics {
	uint64_t tx;
	uint64_t rx;
	uint64_t dropped;
} __rte_cache_aligned;
struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];

#define MAX_TIMER_PERIOD 86400 /* 1 day max */
/* A tsc-based timer responsible for triggering statistics printout */
static uint64_t timer_period = 1; /* default period is 10 seconds */

static struct rte_tm_error tm_error;

/* Print out statistics on packets dropped */
static void
print_stats(void)
{
	uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
	unsigned portid;

	total_packets_dropped = 0;
	total_packets_tx = 0;
	total_packets_rx = 0;

	const char clr[] = { 27, '[', '2', 'J', '\0' };
	const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };

		/* Clear screen and move to top left */
	printf("%s%s", clr, topLeft);

	printf("\nPort statistics ====================================");

	for (portid = 0; portid < 2; portid++) {

		printf("\nStatistics for port %u ------------------------------"
			   "\nPackets sent: %24"PRIu64
			   "\nPackets received: %20"PRIu64
			   "\nPackets dropped: %21"PRIu64,
			   portid,
			   port_statistics[portid].tx,
			   port_statistics[portid].rx,
			   port_statistics[portid].dropped);

		total_packets_dropped += port_statistics[portid].dropped;
		total_packets_tx += port_statistics[portid].tx;
		total_packets_rx += port_statistics[portid].rx;
	}
	printf("\nAggregate statistics ==============================="
		   "\nTotal packets sent: %18"PRIu64
		   "\nTotal packets received: %14"PRIu64
		   "\nTotal packets dropped: %15"PRIu64,
		   total_packets_tx,
		   total_packets_rx,
		   total_packets_dropped);
	printf("\n====================================================\n");
}

/* main processing loop */
static void
l2fwd_main_loop(void)
{
	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
	struct rte_mbuf *m;
	unsigned lcore_id;
	uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
	int i, j, nb_rx;
	struct lcore_queue_conf *qconf;
	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
			BURST_TX_DRAIN_US;
	struct rte_eth_dev_tx_buffer *buffer;

	prev_tsc = 0;
	timer_tsc = 0;

	lcore_id = rte_lcore_id();

	qconf = &lcore_queue_conf[lcore_id];

	if (qconf->rx_port < 0) {
		RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
		return;
	}

	RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);

	int tx_port = 1 - qconf->rx_port;
	int rx_port = qconf->rx_port;
	while (!force_quit) {

			cur_tsc = rte_rdtsc();

			/*
			 * TX burst queue drain
			 */
			diff_tsc = cur_tsc - prev_tsc;
			if (unlikely(diff_tsc > drain_tsc)) {

				buffer = tx_buffer[tx_port];

				for (int cos = 0 ; cos < COS_COUNT ; cos ++)
				{
					int sent;
					sent = rte_eth_tx_buffer_flush(tx_port, cos, buffer);
					if (sent)
						port_statistics[tx_port].tx += sent;
				}

				/* if timer is enabled */
				if (timer_period > 0) {

					/* advance the timer */
					timer_tsc += diff_tsc;

					/* if timer has reached its timeout */
					if (unlikely(timer_tsc >= timer_period)) {

						/* do this only on master core */
						if (lcore_id == rte_get_master_lcore()) {
							print_stats();
							/* reset the timer */
							timer_tsc = 0;
						}
					}
				}

				prev_tsc = cur_tsc;
			}

			/*
			 * Read packet from RX queues
			 */

				nb_rx = rte_eth_rx_burst(rx_port, 0,
							 pkts_burst, MAX_PKT_BURST);

				port_statistics[rx_port].rx += nb_rx;

				for (j = 0; j < nb_rx; j++) {
					m = pkts_burst[j];
					rte_prefetch0(rte_pktmbuf_mtod(m, void *));

					uint16_t* data_2b = rte_pktmbuf_mtod(m, uint16_t* );

					uint16_t udp_dst_port =  htons(data_2b[18]);

					uint8_t queue_out = (udp_dst_port - 2000) ;  // dst_port udp == 2000


					int sent = rte_eth_tx_buffer(tx_port, queue_out , tx_buffer[tx_port], m);
					if (sent)
						port_statistics[tx_port].tx += sent;
				}
			
		}
}

static int
l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
{
	l2fwd_main_loop();
	return 0;
}


static void
signal_handler(int signum)
{
	if (signum == SIGINT || signum == SIGTERM) {
		printf("\n\nSignal %d received, preparing to exit...\n",
				signum);
		force_quit = true;
	}
}


static struct rte_tm_shaper_params  get_shaper (uint64_t bw_Mbps, uint64_t buf_dur_ms)
{
	const int bw_Mbps_2_Bps =  1000000/8;
	const int buf_dur_ms_2_Bytes =  10000000/8;// for 10GE
	
    struct rte_tm_shaper_params res_shaper = {
	        //.committed = not supported !!!,
	        .peak = {.rate = bw_Mbps * bw_Mbps_2_Bps, .size =0,}, //buf_dur_ms * buf_dur_ms_2_Bytes not supported
	        .pkt_length_adjust = 0, //not supported !!!
	};
	return res_shaper;
}

int
main(int argc, char **argv)
{
	
	int COS_Q_2_TC[] = {2,0,1,3};
	
	//================================== CONFIGURE COS ==========>>>
	

	struct COS_PARAM cos_arr[COS_COUNT];//two class
	
	cos_arr[0].shaper_param = get_shaper(500, 0); // 300Mbps 50ms
	cos_arr[1].shaper_param = get_shaper(5000, 0); // 400Mbps 50ms
	cos_arr[2].shaper_param = get_shaper(1000, 0); // 300Mbps 50ms
	cos_arr[3].shaper_param = get_shaper(300, 0); // 400Mbps 50ms


	//================================== CONFIGURE COS ==========|||
	
	
	int ret;
	uint16_t nb_ports = 2;
	uint16_t portid;
	unsigned lcore_id, rx_lcore_id;
	//unsigned nb_ports_in_mask = 0;

	/* init EAL */
	ret = rte_eal_init(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
	argc -= ret;
	argv += ret;

	force_quit = false;
	signal(SIGINT, signal_handler);
	signal(SIGTERM, signal_handler);
	
	for (int i = 0 ; i < RTE_MAX_LCORE ; i++)
	{
		lcore_queue_conf[i].rx_port = -1;
	}

	/* convert to number of cycles */
	timer_period *= rte_get_timer_hz();

	/* create the mbuf pool */
	l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
		MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
		rte_socket_id());
	if (l2fwd_pktmbuf_pool == NULL)
		rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");

	nb_ports = rte_eth_dev_count_avail();
	if (nb_ports == 0)
		rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");

	/* reset l2fwd_dst_ports */
	for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
		l2fwd_dst_ports[portid] = 1-portid;
	//last_port = 0;

	rx_lcore_id = 0;
	//qconf = NULL;
	
	
	/* Initialize the port/queue configuration of each logical core */
	for (portid = 0; portid < nb_ports; portid++) {

		lcore_queue_conf[rx_lcore_id].rx_port = portid; 
		
		printf("Lcore %u: RX port %u\n", rx_lcore_id, portid);
		rx_lcore_id = rte_get_next_lcore(rx_lcore_id, true, true);
	}


	/* Initialise each port */
	for (portid = 0; portid < nb_ports; portid++) {

		/* init port */
		printf("Initializing port %u... ", portid);
		fflush(stdout);
		ret = rte_eth_dev_configure(portid, 1, COS_COUNT, &port_conf);
		if (ret < 0)
			rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
				  ret, portid);

		rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]);
		fflush(stdout);
		
		
		
		//===================================== CONFIGURE COS ON NIC ==========>>>
		
		
		int cos_i  = 0 ;
			ret = rte_eth_rx_queue_setup(portid, cos_i, nb_rxd,
				 rte_eth_dev_socket_id(portid),
				 NULL,
				 l2fwd_pktmbuf_pool);
			
			if (ret < 0)
			rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u", ret, portid);
			
			fflush(stdout);
		for (int cos_i = 0; cos_i < COS_COUNT; cos_i++ ) {
			cos_arr[cos_i].shaper_id = (uint)cos_i;
			if (rte_tm_shaper_profile_add (portid, cos_arr[cos_i].shaper_id, &cos_arr[cos_i].shaper_param, &tm_error) != 0 ) {
				rte_exit(EXIT_FAILURE, "can't register shaper for port %d, cos queue %d, reason: %s\n",
					   portid, cos_i, tm_error.message);
			}
		}
		
		enum rte_tm_stats_type TM_NODE_STATS_MASK =  RTE_TM_STATS_N_BYTES;
		

		 	struct rte_tm_capabilities tm_cap;
			int status;
		status = rte_tm_capabilities_get( portid, &tm_cap, &tm_error);
			
			// i40e_tm_capabilities_get(portid, &tm_cap, &tm_error);
		if (status != 0) { 
			rte_exit(EXIT_FAILURE, "port#%d, rte_tm_capabilities error: %s", portid, tm_error.message);
		}
		
		
		uint32_t root_node_id = 1000;
		struct rte_tm_node_params root_node_param={
	        .shaper_profile_id = RTE_TM_SHAPER_PROFILE_ID_NONE,
	        .nonleaf = {
	            .wfq_weight_mode = NULL,
	            .n_sp_priorities = 1,
	        },
	        .stats_mask = TM_NODE_STATS_MASK,
	    };
		 status = rte_tm_node_add(portid,
									root_node_id,//!!!!!
									RTE_TM_NODE_ID_NULL,//!!!!!!!!!!!
									0 ,
									1 ,
									0 ,
									&root_node_param,
									&tm_error);
		 if (status != 0) { 
				rte_exit(EXIT_FAILURE, "port#%d, root_node error: %s", portid, tm_error.message);
			}
		 
		 
		/* init one TX queue on each port */
		for (int cos_i = 0; cos_i < COS_COUNT ; cos_i++) {
			//-------------generate tx queue 
			//if (cos_i == 0 )
			{
				ret = rte_eth_tx_queue_setup(portid, cos_i, nb_txd,
						rte_eth_dev_socket_id(portid),
						NULL);
				if (ret < 0)
					rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
						ret, portid);
			}
		}
		 
		for (int cos_i_1 = 0 ; cos_i_1 < COS_COUNT ; cos_i_1++ ) {
			
			int cos_i = cos_i_1 ;
			
			struct rte_tm_node_params  TC_node_params = {
				.shaper_profile_id =  cos_arr[cos_i].shaper_id,
				.nonleaf = {
					.wfq_weight_mode = NULL,
					.n_sp_priorities = 1,
				},
				.stats_mask = TM_NODE_STATS_MASK,
			};

			status = rte_tm_node_add(portid,
						cos_i+1+root_node_id,
						root_node_id, // cos_i
						0,
						1,
						1,
						&TC_node_params,
						&tm_error);
			if (status != 0) { 
				rte_exit(EXIT_FAILURE, "port#%d,queue_node#%d ,_node error: %s", portid,cos_i,tm_error.message);
			}
		}
			
			for (int cos_i = 0; cos_i < NELEMS(COS_Q_2_TC) ; cos_i++) 
			{
				struct rte_tm_node_params  queue_node_params = {
					.shaper_profile_id =  RTE_TM_SHAPER_PROFILE_ID_NONE,//cos_i,
					.leaf = {
						.cman = RTE_TM_CMAN_TAIL_DROP,//!!!! only that supported
						.wred = {
						.wred_profile_id = RTE_TM_WRED_PROFILE_ID_NONE} //!!!! WRED is NOT SUPPORTED
					},
					.stats_mask = TM_NODE_STATS_MASK,
				};
				//=====================================  Queues's node =========>>>>
				status = rte_tm_node_add(portid,
										cos_i,
										COS_Q_2_TC[cos_i]+1+root_node_id, // cos_i
										0,
										1,
										2,
										&queue_node_params,
										&tm_error);
				if (status != 0) { 
					rte_exit(EXIT_FAILURE, "port#%d,queue_node#%d ,_node error: %s", portid,cos_i,tm_error.message);
				}
				//====================================== Queues's node =========||||
			}
			
			
		

		 status = rte_tm_hierarchy_commit(portid, 1, &tm_error);
		if (status != 0) { 
				rte_exit(EXIT_FAILURE, "port#%d,traffic management hierarchy commit error: %s", portid, tm_error.message);
			}
		 
		//===================================== CONFIGURE COS ON NIC ==========|||
		
		/* Initialize TX buffers */
		tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
				RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
				rte_eth_dev_socket_id(portid));
		if (tx_buffer[portid] == NULL)
			rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
					portid);

		rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);

		ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
				rte_eth_tx_buffer_count_callback,
				&port_statistics[portid].dropped);
		if (ret < 0)
			rte_exit(EXIT_FAILURE,
			"Cannot set error callback for tx buffer on port %u\n",
				 portid);

		/* Start device */
		ret = rte_eth_dev_start(portid);
		if (ret < 0)
			rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
				  ret, portid);

		printf("done: \n");

		rte_eth_promiscuous_enable(portid);

		printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
				portid,
				l2fwd_ports_eth_addr[portid].addr_bytes[0],
				l2fwd_ports_eth_addr[portid].addr_bytes[1],
				l2fwd_ports_eth_addr[portid].addr_bytes[2],
				l2fwd_ports_eth_addr[portid].addr_bytes[3],
				l2fwd_ports_eth_addr[portid].addr_bytes[4],
				l2fwd_ports_eth_addr[portid].addr_bytes[5]);

		/* initialize port stats */
		memset(&port_statistics, 0, sizeof(port_statistics));
	}
	//check_all_ports_link_status(nb_ports, 0xff);

	ret = 0;
	/* launch per-lcore init on every lcore */
	rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		if (rte_eal_wait_lcore(lcore_id) < 0) {
			ret = -1;
			break;
		}
	}

	for (portid = 0; portid < nb_ports; portid++) {
		if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
			continue;
		printf("Closing port %d...", portid);
		rte_eth_dev_stop(portid);
		rte_eth_dev_close(portid);
		printf(" Done\n");
	}
	printf("Bye...\n");

	return ret;
}

                 reply	other threads:[~2019-05-21 16:07 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=484121557943369@sas1-0a6c2e2b59d7.qloud-c.yandex.net \
    --to=dfrolov@ubic.tech \
    --cc=users@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).