Hello, I'm trying to use DPDK for a kernel bypass networking solution at my new job. I already managed to get it to forward packets that I sent it from a remote machine, using the basic forwarding example app, but now I'm trying to craft my own packets from scratch and get tx_burst() to send them to my remote machine. I stayed till 5 AM last night trying to get it working, but I couldn't. Here is my function that takes a pointer to an mbuf, allocates memory for it from the initialized memory pool, proceeds to fill out Ethernet, IP and UDP headers, and sets offload flags in mbuf to offload IP and UDP checksum calculation to the network card: void allocate_and_populate_packet_mbuf( struct rte_mbuf** buf_ptr ,struct rte_mempool* mempool) { const uint16_t PAYLOAD_LEN = 128; uint8_t payload[PAYLOAD_LEN]; const uint16_t PACKET_LEN = sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_udp_hdr) + PAYLOAD_LEN ; struct rte_ether_hdr* ether_hdr; struct rte_ipv4_hdr* ipv4_hdr; struct rte_udp_hdr* udp_hdr; char* data; uint8_t* payload_addr; *((uint64_t*)(payload + 0)) = PACKET_ID; memset(payload + 8, 0xF0, PAYLOAD_LEN - 8); *buf_ptr = rte_pktmbuf_alloc(mempool); (*buf_ptr)->data_len = PACKET_LEN; (*buf_ptr)->pkt_len = PACKET_LEN; ether_hdr = rte_pktmbuf_mtod(*buf_ptr, struct rte_ether_hdr *); memset(ether_hdr, 0x00, sizeof(struct rte_ether_hdr)); /* * Destination MAC Address = 0A:40:46:FB:8A:A9 * Source MAC Address (this NIC) = 0A:C9:6E:32:4D:49 */ ether_hdr->dst_addr.addr_bytes[0] = 0x0A; ether_hdr->dst_addr.addr_bytes[1] = 0x40; ether_hdr->dst_addr.addr_bytes[2] = 0x46; ether_hdr->dst_addr.addr_bytes[3] = 0xFB; ether_hdr->dst_addr.addr_bytes[4] = 0x8A; ether_hdr->dst_addr.addr_bytes[5] = 0xA9; ether_hdr->src_addr.addr_bytes[0] = 0x0A; ether_hdr->src_addr.addr_bytes[1] = 0xC9; ether_hdr->src_addr.addr_bytes[2] = 0x6E; ether_hdr->src_addr.addr_bytes[3] = 0x32; ether_hdr->src_addr.addr_bytes[4] = 0x4D; ether_hdr->src_addr.addr_bytes[5] = 0x49; ether_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); ipv4_hdr = rte_pktmbuf_mtod_offset(*buf_ptr, struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr)); memset(ipv4_hdr, 0x00, sizeof(struct rte_ipv4_hdr)); ipv4_hdr->total_length = rte_cpu_to_be_16( sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_udp_hdr) + PAYLOAD_LEN ); ipv4_hdr->time_to_live = 255; ipv4_hdr->next_proto_id = IPPROTO_UDP; ipv4_hdr->src_addr = inet_addr("172.31.32.59"); ipv4_hdr->dst_addr = inet_addr("37.63.34.53"); ipv4_hdr->version_ihl = 69; ipv4_hdr->ihl = 5; ipv4_hdr->version = 4; udp_hdr = rte_pktmbuf_mtod_offset( *buf_ptr ,struct rte_udp_hdr * ,sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) ); udp_hdr->src_port = rte_cpu_to_be_16(54749); udp_hdr->dst_port = rte_cpu_to_be_16(38686); udp_hdr->dgram_len = rte_cpu_to_be_16(sizeof(struct rte_udp_hdr) + PAYLOAD_LEN); /* Also make sure we offload IP checksum and UDP checksum * calculation to our network card instead of manually doing it. */ /* This is how the web wiki page says to do it. */ (*buf_ptr)->l2_len = sizeof(struct rte_ether_hdr); (*buf_ptr)->l3_len = sizeof(struct rte_ipv4_hdr); (*buf_ptr)->ol_flags |= RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM; //(*buf_ptr)->ol_flags = 384; (*buf_ptr)->port = 0; (*buf_ptr)->packet_type = 528; (*buf_ptr)->l2_type = 0; (*buf_ptr)->l3_type = 1; (*buf_ptr)->l4_type = 2; udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, (*buf_ptr)->ol_flags); /* I don't understand whether I need this call or not. */ /* I can see my payload and headers already populated */ /* by the dumped mbuf, even without this call. */ //data = rte_pktmbuf_append(*buf_ptr, PAYLOAD_LEN); /* if(data == NULL){ printf("\n\n[ERR] DPDK: rte_pktmbuf_append(*buf_ptr, PAYLOAD_LEN) failed!\n\n"); printf("[ERR] Can't construct packet message buffer! Returning.\n\n"); return; } */ rte_memcpy(udp_hdr + sizeof(struct rte_udp_hdr), payload, PAYLOAD_LEN); return; } On return from this function, I call the prepare function on the packet, because the web wiki says that this is the function that looks at the offload flags of the mbuf and calculates the offloaded checksums. (However, when I print the IPv4 header info after the prepare() call, it is still 0 for some reason. Is that normal?) /* * The lcore main. This is the main thread that does the work. * It generates random 128-byte payloads (first 8 bytes store * a constant packet ID so the other side recognizes us) and * continuously transmits them, 1 packet at a time. */ /* Basic sending application lcore. 8< */ static __rte_noreturn void lcore_main(void) { uint16_t port; uint64_t test_counter = 0; uint64_t burst_counter = 0; uint16_t ready_packets = 0; /* * Check that the port is on the same NUMA node as the polling thread * for best performance. */ RTE_ETH_FOREACH_DEV(port) if (rte_eth_dev_socket_id(port) >= 0 && rte_eth_dev_socket_id(port) != (int)rte_socket_id()) printf("WARNING, port %u is on remote NUMA node to " "polling thread.\n\tPerformance will " "not be optimal.\n", port); printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", rte_lcore_id()); /* Main work of application loop. 8< */ for (;;) { /* Go over each NIC connected to a DPDK driver. */ /* Transmit 1 packet from each NIC. */ /* For now, just 1 NIC is connected, port id = 0. */ RTE_ETH_FOREACH_DEV(port) { /* Note that for now, burst size is 1, we send 1 packet at a time. */ /* Local array of POINTERS to mbuf objects. * Each pointer is returned by a call to rte_pktmbuf_alloc(mempool). * This call is made for each packet that must be sent. Only then * can we start populating the packet's headedrs and payload. */ struct rte_mbuf *bufs[BURST_SIZE]; allocate_and_populate_packet_mbuf(&(bufs[0]), mbuf_pool); /* Check packet correctness and update offloaded checksums. */ ready_packets = rte_eth_tx_prepare(port, 0, bufs, 1); printf("\n\nNumber of packets checked, updated, ready to send: %u\n", ready_packets); /* Dump a packet to a file for inspection (stdout here) */ rte_pktmbuf_dump(stdout, bufs[0], 256); /* Send burst of TX packets from the same NIC. */ const uint16_t nb_tx = rte_eth_tx_burst( port /* NIC ID = 0 */ ,0 /* TX QUEUE ID */ ,bufs /* mbuf** */ ,1 /* packets to transmit */ ); /* Free any unsent packets. */ if (unlikely(nb_tx < 1)) { printf("Warning: Only %u packets transmitted!\n", nb_tx); printf("Manually freeing the mbuf of each unsent packet.\n"); uint16_t buf; for (buf = nb_tx; buf < 1; buf++) rte_pktmbuf_free(bufs[buf]); } } } /* >8 End of loop. */ } I am using DPDK 23.11 here, with the AWS ENA poll mode driver, on Amazon Linux 2023. with igb_uio kernel module. Again, I got DPDK forwarding to work just fine, but that was when I took the mbuf given to me by rx_burst(), swap source and destination MAC and IP addresses and UDP ports, and give the mbuf to tx_burst(). However, now with my manual allocation of the mbuf and packet data, it's not working. What am I missing here? I've verified the IP and MAC addresses multiple times, they work with basic forwarder, with mbuf returned by rx_burst.