DPDK patches and discussions
 help / color / mirror / Atom feed
* Please help me send packets in a newly allocated mbuf.
@ 2025-02-06 14:18 Kevin Stefanov
  0 siblings, 0 replies; only message in thread
From: Kevin Stefanov @ 2025-02-06 14:18 UTC (permalink / raw)
  To: dev

[-- Attachment #1: Type: text/plain, Size: 9281 bytes --]

Hello, I'm trying to use DPDK for a kernel bypass networking solution at my
new job. I already managed to get it to forward packets that I sent it from
a remote machine, using the basic forwarding example app, but now I'm
trying to craft my own packets from scratch and get tx_burst() to send them
to my remote machine.

I stayed till 5 AM last night trying to get it working, but I couldn't.
Here is my function that takes a pointer to an mbuf, allocates memory for
it from the initialized memory pool, proceeds to fill out Ethernet, IP and
UDP headers, and sets offload flags in mbuf to offload IP and UDP checksum
calculation to the network card:


void allocate_and_populate_packet_mbuf( struct rte_mbuf**   buf_ptr
                                       ,struct rte_mempool* mempool)
{
    const uint16_t PAYLOAD_LEN = 128;
    uint8_t payload[PAYLOAD_LEN];
    const uint16_t PACKET_LEN =  sizeof(struct rte_ether_hdr)
                               + sizeof(struct rte_ipv4_hdr)
                               + sizeof(struct rte_udp_hdr)
                               + PAYLOAD_LEN
                               ;

    struct rte_ether_hdr* ether_hdr;
    struct rte_ipv4_hdr*  ipv4_hdr;
    struct rte_udp_hdr*   udp_hdr;

    char* data;

    uint8_t* payload_addr;

    *((uint64_t*)(payload + 0)) = PACKET_ID;
    memset(payload + 8, 0xF0, PAYLOAD_LEN - 8);

    *buf_ptr = rte_pktmbuf_alloc(mempool);

    (*buf_ptr)->data_len = PACKET_LEN;
    (*buf_ptr)->pkt_len  = PACKET_LEN;

    ether_hdr = rte_pktmbuf_mtod(*buf_ptr, struct rte_ether_hdr *);

    memset(ether_hdr, 0x00, sizeof(struct rte_ether_hdr));

    /*
     * Destination MAC Address       = 0A:40:46:FB:8A:A9
     * Source MAC Address (this NIC) = 0A:C9:6E:32:4D:49
     */

    ether_hdr->dst_addr.addr_bytes[0] = 0x0A;
    ether_hdr->dst_addr.addr_bytes[1] = 0x40;
    ether_hdr->dst_addr.addr_bytes[2] = 0x46;
    ether_hdr->dst_addr.addr_bytes[3] = 0xFB;
    ether_hdr->dst_addr.addr_bytes[4] = 0x8A;
    ether_hdr->dst_addr.addr_bytes[5] = 0xA9;

    ether_hdr->src_addr.addr_bytes[0] = 0x0A;
    ether_hdr->src_addr.addr_bytes[1] = 0xC9;
    ether_hdr->src_addr.addr_bytes[2] = 0x6E;
    ether_hdr->src_addr.addr_bytes[3] = 0x32;
    ether_hdr->src_addr.addr_bytes[4] = 0x4D;
    ether_hdr->src_addr.addr_bytes[5] = 0x49;

    ether_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);

    ipv4_hdr = rte_pktmbuf_mtod_offset(*buf_ptr, struct rte_ipv4_hdr *,
sizeof(struct rte_ether_hdr));

    memset(ipv4_hdr, 0x00, sizeof(struct rte_ipv4_hdr));

    ipv4_hdr->total_length = rte_cpu_to_be_16(  sizeof(struct rte_ipv4_hdr)
                                              + sizeof(struct rte_udp_hdr)
                                              + PAYLOAD_LEN
                                             );

    ipv4_hdr->time_to_live    = 255;
    ipv4_hdr->next_proto_id = IPPROTO_UDP;
    ipv4_hdr->src_addr      = inet_addr("172.31.32.59");
    ipv4_hdr->dst_addr      = inet_addr("37.63.34.53");
    ipv4_hdr->version_ihl   = 69;
    ipv4_hdr->ihl                = 5;
    ipv4_hdr->version       = 4;

    udp_hdr = rte_pktmbuf_mtod_offset( *buf_ptr
                                      ,struct rte_udp_hdr *
                                      ,sizeof(struct rte_ether_hdr) +
sizeof(struct rte_ipv4_hdr)
                                     );

    udp_hdr->src_port  = rte_cpu_to_be_16(54749);
    udp_hdr->dst_port  = rte_cpu_to_be_16(38686);
    udp_hdr->dgram_len = rte_cpu_to_be_16(sizeof(struct rte_udp_hdr) +
PAYLOAD_LEN);

    /* Also make sure we offload IP checksum and UDP checksum
     * calculation to our network card instead of manually doing it.
     */
     /* This is how the web wiki page says to do it. */
    (*buf_ptr)->l2_len = sizeof(struct rte_ether_hdr);

    (*buf_ptr)->l3_len = sizeof(struct rte_ipv4_hdr);


    (*buf_ptr)->ol_flags |=   RTE_MBUF_F_TX_IPV4
                            | RTE_MBUF_F_TX_IP_CKSUM
                            | RTE_MBUF_F_TX_UDP_CKSUM;


    //(*buf_ptr)->ol_flags = 384;

    (*buf_ptr)->port = 0;
    (*buf_ptr)->packet_type = 528;
    (*buf_ptr)->l2_type = 0;
    (*buf_ptr)->l3_type = 1;
    (*buf_ptr)->l4_type = 2;

    udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr,
(*buf_ptr)->ol_flags);

     /* I don't understand whether I need this call or not. */
     /* I can see my payload and headers already populated */
     /* by the dumped mbuf, even without this call. */
    //data = rte_pktmbuf_append(*buf_ptr, PAYLOAD_LEN);
    /*
    if(data == NULL){
        printf("\n\n[ERR] DPDK: rte_pktmbuf_append(*buf_ptr, PAYLOAD_LEN)
failed!\n\n");
        printf("[ERR] Can't construct packet message buffer!
Returning.\n\n");
        return;
    }
    */

    rte_memcpy(udp_hdr + sizeof(struct rte_udp_hdr), payload, PAYLOAD_LEN);

    return;
}

On return from this function, I call the prepare function on the packet,
because the web wiki says
that this is the function that looks at the offload flags of the mbuf and
calculates the offloaded
checksums. (However, when I print the IPv4 header info after the prepare()
call, it is still
0 for some reason. Is that normal?)

/*
 * The lcore main. This is the main thread that does the work.
 * It generates random 128-byte payloads (first 8 bytes store
 * a constant packet ID so the other side recognizes us) and
 * continuously transmits them, 1 packet at a time.
 */

 /* Basic sending application lcore. 8< */
static __rte_noreturn void
lcore_main(void)
{
        uint16_t port;
        uint64_t test_counter = 0;
        uint64_t burst_counter = 0;
        uint16_t ready_packets = 0;

        /*
         * Check that the port is on the same NUMA node as the polling
thread
         * for best performance.
         */
        RTE_ETH_FOREACH_DEV(port)
                if (rte_eth_dev_socket_id(port) >= 0 &&
                                rte_eth_dev_socket_id(port) !=
                                                (int)rte_socket_id())
                        printf("WARNING, port %u is on remote NUMA node to "
                                        "polling thread.\n\tPerformance
will "
                                        "not be optimal.\n", port);

        printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
                        rte_lcore_id());

        /* Main work of application loop. 8< */
        for (;;) {

                /* Go over each NIC connected to a DPDK driver. */
                /* Transmit 1 packet from each NIC. */
                /* For now, just 1 NIC is connected, port id = 0. */
                RTE_ETH_FOREACH_DEV(port) {

                        /* Note that for now, burst size is 1, we send 1
packet at a time. */
                        /* Local array of POINTERS to mbuf objects.
                         * Each pointer is returned by a call to
rte_pktmbuf_alloc(mempool).
                         * This call is made for each packet that must be
sent. Only then
                         * can we start populating the packet's headedrs
and payload.
                         */
                        struct rte_mbuf *bufs[BURST_SIZE];

                        allocate_and_populate_packet_mbuf(&(bufs[0]),
mbuf_pool);

                        /* Check packet correctness and update offloaded
checksums. */

                        ready_packets = rte_eth_tx_prepare(port, 0, bufs,
1);

                        printf("\n\nNumber of packets checked, updated,
ready to send: %u\n", ready_packets);



                        /* Dump a packet to a file for inspection (stdout
here) */
                        rte_pktmbuf_dump(stdout, bufs[0], 256);

                        /* Send burst of TX packets from the same NIC. */

                        const uint16_t nb_tx = rte_eth_tx_burst( port   /*
NIC ID = 0           */
                                                                ,0      /*
TX QUEUE ID          */
                                                                ,bufs   /*
mbuf**               */
                                                                ,1      /*
packets to transmit  */
                                                               );

                        /* Free any unsent packets. */
                        if (unlikely(nb_tx < 1)) {
                                printf("Warning: Only %u packets
transmitted!\n", nb_tx);
                                printf("Manually freeing the mbuf of each
unsent packet.\n");
                                uint16_t buf;
                                for (buf = nb_tx; buf < 1; buf++)
                                        rte_pktmbuf_free(bufs[buf]);
                        }
                }
        }
        /* >8 End of loop. */
}

I am using DPDK 23.11 here, with the AWS ENA poll mode driver, on Amazon
Linux 2023. with igb_uio kernel module.
Again, I got DPDK forwarding to work just fine, but that was when I took
the mbuf given to me by rx_burst(), swap
source and destination MAC and IP addresses and UDP ports, and give the
mbuf to tx_burst().

However, now with my manual allocation of the mbuf and packet data, it's
not working. What am I missing here?
I've verified the IP and MAC addresses multiple times, they work with basic
forwarder, with mbuf returned by rx_burst.

[-- Attachment #2: Type: text/html, Size: 11493 bytes --]

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2025-02-10  7:55 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-02-06 14:18 Please help me send packets in a newly allocated mbuf Kevin Stefanov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).