Hi, > I don't find the 'direct' mode in latest Linux. Do you have a plan to > do this in Linux No, I was not planning on any Linux kernel contributions. > IIUC, the 'direct' mode just to let the packets from one port send out > at the same port, right? > If it is, why do you introduce this new mode? It doesn't seem to have > anything to do with bonding. Indeed, the base functionality is to forward packets to the same port they were received on. The main use-case is to allow applications use one aggregated (bonding) port ensure that packet forwarding is always the same. I find it a lot simpler to use a bonding port whenever I have an application that uses multiple ports. It means I don't have to configure, start and stop all the ports; the bonding PMD is a perfect abstraction for that. Kamil On 7/10/24 05:49, lihuisong (C) wrote: > Hi Kamil, > > 在 2024/4/12 20:27, Kamil Vojanec 写道: >> This patch adds a new bonding mode: 'direct'. The purpose of this mode >> is to redirect output packets to the same member port on which they were >> received. In case the requested port is not among the member ports, >> round robin TX mode is used as a fallback. > I don't find the 'direct' mode in latest Linux. Do you have a plan to > do this in Linux? > In addition, all the bonding mode have been supported in DPDK. > I am not sure if it is ok for us to add any new mode for our application. > > IIUC, the 'direct' mode just to let the packets from one port send out > at the same port, right? > If it is, why do you introduce this new mode? It doesn't seem to have > anything to do with bonding. > Application also can very simply do that anyway. > > /Huisong >> >> Signed-off-by: Kamil Vojanec >> --- >>   app/test/test_link_bonding.c            | 205 ++++++++++++++++++++++++ >>   drivers/net/bonding/rte_eth_bond.h      |   5 + >>   drivers/net/bonding/rte_eth_bond_args.c |   1 + >>   drivers/net/bonding/rte_eth_bond_pmd.c  |  50 +++++- >>   4 files changed, 258 insertions(+), 3 deletions(-) >> >> diff --git a/app/test/test_link_bonding.c b/app/test/test_link_bonding.c >> index 4d54706c21..254cacf5aa 100644 >> --- a/app/test/test_link_bonding.c >> +++ b/app/test/test_link_bonding.c >> @@ -2134,6 +2134,208 @@ >> test_roundrobin_verify_polling_member_link_status_change(void) >>       return remove_members_and_stop_bonding_device(); >>   } >>   +/** Direct mode Tests */ >> + >> +static int >> +test_direct_tx_burst_single_member(void) >> +{ >> +    unsigned int i; >> +    int member_port_id; >> +    struct rte_eth_stats port_stats; >> +    struct rte_mbuf *pkts[MAX_PKT_BURST]; >> +    const unsigned int burst_size = 20; >> + >> +    TEST_ASSERT(burst_size <= MAX_PKT_BURST, >> +            "Burst size specified is greater than supported."); >> + >> +    TEST_ASSERT_SUCCESS(initialize_bonding_device_with_members( >> +            BONDING_MODE_DIRECT, 0, 1, 1), >> +            "Failed to initialize bonding device with single member"); >> + >> +    /* Generate burst of test packets */ >> +    TEST_ASSERT_EQUAL(generate_test_burst(pkts, burst_size, 0, 1, 0, >> 0, 0), >> +            (int) burst_size, "Failed to generate test burst"); >> + >> +    member_port_id = test_params->member_port_ids[0]; >> + >> +    /* Set the 'port' mbuf attribute to the appropriate value */ >> +    for (i = 0; i < burst_size; i++) >> +        pkts[i]->port = member_port_id; >> + >> +    /* Send burst on bonding port */ >> +    TEST_ASSERT_EQUAL(rte_eth_tx_burst( >> +            test_params->bonding_port_id, 0, pkts, burst_size), >> +            burst_size, >> +            "TX burst failed"); >> + >> +    /* Verify stats on bonding port */ >> +    rte_eth_stats_get(test_params->bonding_port_id, &port_stats); >> +    TEST_ASSERT_EQUAL(port_stats.opackets, burst_size, >> +            "Bonding port (%d) opackets value (%u) not as expected >> (%u)\n", >> +            test_params->bonding_port_id, (unsigned int) >> port_stats.opackets, >> +            burst_size); >> + >> +    /* Verify stats on member port */ >> +    rte_eth_stats_get(member_port_id, &port_stats); >> +    TEST_ASSERT_EQUAL(port_stats.opackets, burst_size, >> +            "Member port (%d) opackets value (%u) not as expected >> (%u)\n", >> +            member_port_id, (unsigned int) port_stats.opackets, >> +            burst_size); >> + >> +    /* Put all members down and try to transmit */ >> + virtual_ethdev_simulate_link_status_interrupt(member_port_id, 0); >> + >> +    /* Try to send burst on bonding port */ >> + TEST_ASSERT_EQUAL(rte_eth_tx_burst(test_params->bonding_port_id, 0, >> +            pkts, burst_size), 0, >> +            "TX burst returned unexpected value"); >> + >> +    /* Clean up and remove members from bonding device */ >> +    return remove_members_and_stop_bonding_device(); >> +} >> + >> +static int >> +test_direct_tx_burst_multiple_members_single_tx(void) >> +{ >> +    unsigned int i; >> +    struct rte_eth_stats port_stats; >> +    struct rte_mbuf *pkts[MAX_PKT_BURST]; >> +    const unsigned int num_members = 4; >> +    const unsigned int burst_size = 20; >> +    const int exp_member_port_id = test_params->member_port_ids[0]; >> +    int cmember_port_id; >> +    unsigned int expected_pkts; >> + >> + >> +    TEST_ASSERT(burst_size <= MAX_PKT_BURST, >> +            "Burst size specified is greater than supported."); >> + >> +    TEST_ASSERT_SUCCESS(initialize_bonding_device_with_members( >> +            BONDING_MODE_DIRECT, 0, num_members, 1), >> +            "Failed to initialize bonding device with single member"); >> + >> +    /* Generate burst of test packets */ >> +    TEST_ASSERT_EQUAL(generate_test_burst( >> +            pkts, burst_size, 0, 1, 0, 0, 0), >> +            (int) burst_size, >> +            "Failed to generate test burst"); >> + >> + >> +    /* Set the 'port' mbuf attribute to the appropriate value */ >> +    for (i = 0; i < burst_size; i++) >> +        pkts[i]->port = exp_member_port_id; >> + >> +    /* Send burst on bonding port */ >> +    TEST_ASSERT_EQUAL(rte_eth_tx_burst( >> +            test_params->bonding_port_id, 0, pkts, burst_size), >> +            burst_size, >> +            "TX burst failed"); >> + >> +    /* Verify stats on bonding port */ >> +    rte_eth_stats_get(test_params->bonding_port_id, &port_stats); >> +    TEST_ASSERT_EQUAL(port_stats.opackets, burst_size, >> +            "Bonding port (%d) opackets value (%u) not as expected >> (%u)\n", >> +            test_params->bonding_port_id, (unsigned int) >> port_stats.opackets, >> +            burst_size); >> + >> +    /* Verify member ports tx stats */ >> +    for (i = 0; i < test_params->bonding_member_count; i++) { >> +        cmember_port_id = test_params->member_port_ids[i]; >> + >> +        if (cmember_port_id == exp_member_port_id) >> +            expected_pkts = burst_size; >> +        else >> +            expected_pkts = 0; >> + >> +        rte_eth_stats_get(cmember_port_id, &port_stats); >> +        TEST_ASSERT_EQUAL(port_stats.opackets, >> +                (uint64_t)expected_pkts, >> +                "Member Port (%d) opackets value (%u) not as >> expected (%u)\n", >> +                test_params->bonding_port_id, (unsigned >> int)port_stats.opackets, >> +                expected_pkts); >> +    } >> + >> +    /* Put all members down and try and transmit */ >> +    for (i = 0; i < test_params->bonding_member_count; i++) { >> +        virtual_ethdev_simulate_link_status_interrupt( >> +                test_params->member_port_ids[i], 0); >> +    } >> + >> +    /* Try to send burst on bonding port */ >> + TEST_ASSERT_EQUAL(rte_eth_tx_burst(test_params->bonding_port_id, 0, >> +            pkts, burst_size), 0, >> +            "TX burst returned unexpected value"); >> + >> +    /* Clean up and remove members from bonding device */ >> +    return remove_members_and_stop_bonding_device(); >> +} >> + >> +static int >> +test_direct_tx_burst_multiple_members_rr_fallback(void) >> +{ >> +    unsigned int i; >> +    struct rte_eth_stats port_stats; >> +    struct rte_mbuf *pkts[MAX_PKT_BURST]; >> +    const unsigned int num_members = 4; >> +    const unsigned int pkts_per_member = 20; >> +    const unsigned int burst_size = num_members * pkts_per_member; >> + >> + >> +    TEST_ASSERT(burst_size <= MAX_PKT_BURST, >> +            "Burst size specified is greater than supported."); >> + >> +    TEST_ASSERT_SUCCESS(initialize_bonding_device_with_members( >> +            BONDING_MODE_DIRECT, 0, num_members, 1), >> +            "Failed to initialize bonding device with single member"); >> + >> +    /* Generate burst of test packets */ >> +    TEST_ASSERT_EQUAL(generate_test_burst( >> +            pkts, burst_size, 0, 1, 0, 0, 0), >> +            (int) burst_size, >> +            "Failed to generate test burst"); >> + >> + >> +    /* Set the 'port' mbuf attribute to the appropriate value */ >> +    for (i = 0; i < burst_size; i++) >> +        pkts[i]->port = RTE_MAX_ETHPORTS; >> + >> +    /* Send burst on bonding port */ >> +    TEST_ASSERT_EQUAL(rte_eth_tx_burst( >> +            test_params->bonding_port_id, 0, pkts, burst_size), >> +            burst_size, >> +            "TX burst failed"); >> + >> +    /* Verify stats on bonding port */ >> +    rte_eth_stats_get(test_params->bonding_port_id, &port_stats); >> +    TEST_ASSERT_EQUAL(port_stats.opackets, burst_size, >> +            "Bonding port (%d) opackets value (%u) not as expected >> (%u)\n", >> +            test_params->bonding_port_id, (unsigned int) >> port_stats.opackets, >> +            burst_size); >> + >> +    /* Verify member ports tx stats */ >> +    for (i = 0; i < test_params->bonding_member_count; i++) { >> +        rte_eth_stats_get(test_params->member_port_ids[i], >> &port_stats); >> +        TEST_ASSERT_EQUAL(port_stats.opackets, >> +                (uint64_t)pkts_per_member, >> +                "Member Port (%d) opackets value (%u) not as >> expected (%u)\n", >> +                test_params->bonding_port_id, (unsigned >> int)port_stats.opackets, >> +                pkts_per_member); >> +    } >> + >> +    /* Put all members down and try and transmit */ >> +    for (i = 0; i < test_params->bonding_member_count; i++) { >> +        virtual_ethdev_simulate_link_status_interrupt( >> +                test_params->member_port_ids[i], 0); >> +    } >> + >> +    /* Try to send burst on bonding port */ >> + TEST_ASSERT_EQUAL(rte_eth_tx_burst(test_params->bonding_port_id, 0, >> +            pkts, burst_size), 0, >> +            "TX burst returned unexpected value"); >> + >> +    /* Clean up and remove members from bonding device */ >> +    return remove_members_and_stop_bonding_device(); >> +} >>     /** Active Backup Mode Tests */ >>   @@ -5174,6 +5376,9 @@ static struct unit_test_suite >> link_bonding_test_suite  = { >>           TEST_CASE(test_roundrobin_verify_mac_assignment), >> TEST_CASE(test_roundrobin_verify_member_link_status_change_behaviour), >> TEST_CASE(test_roundrobin_verify_polling_member_link_status_change), >> +        TEST_CASE(test_direct_tx_burst_single_member), >> + TEST_CASE(test_direct_tx_burst_multiple_members_single_tx), >> + TEST_CASE(test_direct_tx_burst_multiple_members_rr_fallback), >>           TEST_CASE(test_activebackup_tx_burst), >>           TEST_CASE(test_activebackup_rx_burst), >> TEST_CASE(test_activebackup_verify_promiscuous_enable_disable), >> diff --git a/drivers/net/bonding/rte_eth_bond.h >> b/drivers/net/bonding/rte_eth_bond.h >> index f10165f2c6..82d6644500 100644 >> --- a/drivers/net/bonding/rte_eth_bond.h >> +++ b/drivers/net/bonding/rte_eth_bond.h >> @@ -83,6 +83,11 @@ extern "C" { >>    * information from it. When ARP reply from that peer is received, >> its MAC is >>    * stored, one of member MACs assigned and ARP reply send to that >> peer. >>    */ >> +#define BONDING_MODE_DIRECT    (7) >> +/**< Direct bonding mode (Mode 7) >> + * In this mode, all packets are transmitted using the same member >> port on which >> + * it was received. This is decided based on the 'port' field of >> 'rte_mbuf'. >> + */ >>     /* Balance Mode Transmit Policies */ >>   #define BALANCE_XMIT_POLICY_LAYER2        (0) >> diff --git a/drivers/net/bonding/rte_eth_bond_args.c >> b/drivers/net/bonding/rte_eth_bond_args.c >> index bdec5d61d4..44e20568db 100644 >> --- a/drivers/net/bonding/rte_eth_bond_args.c >> +++ b/drivers/net/bonding/rte_eth_bond_args.c >> @@ -158,6 +158,7 @@ bond_ethdev_parse_member_mode_kvarg(const char >> *key __rte_unused, >>       case BONDING_MODE_8023AD: >>       case BONDING_MODE_TLB: >>       case BONDING_MODE_ALB: >> +    case BONDING_MODE_DIRECT: >>           return 0; >>       default: >>           RTE_BOND_LOG(ERR, "Invalid member mode value (%s) >> specified", value); >> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c >> b/drivers/net/bonding/rte_eth_bond_pmd.c >> index c40d18d128..57ac5879c4 100644 >> --- a/drivers/net/bonding/rte_eth_bond_pmd.c >> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c >> @@ -578,9 +578,14 @@ bond_ethdev_rx_burst_alb(void *queue, struct >> rte_mbuf **bufs, uint16_t nb_pkts) >>       return nb_recv_pkts; >>   } >>   +enum tx_member_populate_mode { >> +    RR_ONLY, >> +    DIRECT_WITH_RR_FALLBACK, >> +}; >> + >>   static uint16_t >> -bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, >> -        uint16_t nb_pkts) >> +bond_ethdev_tx_burst_rr_generic(void *queue, struct rte_mbuf **bufs, >> +        uint16_t nb_pkts, enum tx_member_populate_mode pop_mode) >>   { >>       struct bond_dev_private *internals; >>       struct bond_tx_queue *bd_tx_q; >> @@ -594,7 +599,7 @@ bond_ethdev_tx_burst_round_robin(void *queue, >> struct rte_mbuf **bufs, >>       uint16_t num_tx_total = 0, num_tx_member; >>         static int member_idx; >> -    int i, cmember_idx = 0, tx_fail_total = 0; >> +    int i, j, cmember_idx = 0, tx_fail_total = 0; >>         bd_tx_q = (struct bond_tx_queue *)queue; >>       internals = bd_tx_q->dev_private; >> @@ -611,6 +616,17 @@ bond_ethdev_tx_burst_round_robin(void *queue, >> struct rte_mbuf **bufs, >>       /* Populate members mbuf with which packets are to be sent on >> it  */ >>       for (i = 0; i < nb_pkts; i++) { >>           cmember_idx = (member_idx + i) % num_of_members; >> + >> +        if (pop_mode == DIRECT_WITH_RR_FALLBACK) { >> +            /* Try to find correct member index */ >> +            for (j = 0; j < num_of_members; j++) { >> +                if (bufs[i]->port == members[j]) { >> +                    cmember_idx = j; >> +                    break; >> +                } >> +            } >> +        } >> + >> member_bufs[cmember_idx][(member_nb_pkts[cmember_idx])++] = bufs[i]; >>       } >>   @@ -646,6 +662,20 @@ bond_ethdev_tx_burst_round_robin(void *queue, >> struct rte_mbuf **bufs, >>       return num_tx_total; >>   } >>   +static uint16_t >> +bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, >> +        uint16_t nb_pkts) >> +{ >> +    return bond_ethdev_tx_burst_rr_generic(queue, bufs, nb_pkts, >> RR_ONLY); >> +} >> + >> +static uint16_t >> +bond_ethdev_tx_burst_direct(void *queue, struct rte_mbuf **bufs, >> +        uint16_t nb_pkts) >> +{ >> +    return bond_ethdev_tx_burst_rr_generic(queue, bufs, nb_pkts, >> DIRECT_WITH_RR_FALLBACK); >> +} >> + >>   static uint16_t >>   bond_ethdev_tx_burst_active_backup(void *queue, >>           struct rte_mbuf **bufs, uint16_t nb_pkts) >> @@ -1551,6 +1581,7 @@ mac_address_members_update(struct rte_eth_dev >> *bonding_eth_dev) >>       case BONDING_MODE_ROUND_ROBIN: >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >> +    case BONDING_MODE_DIRECT: >>           for (i = 0; i < internals->member_count; i++) { >>               if (rte_eth_dev_default_mac_addr_set( >>                       internals->members[i].port_id, >> @@ -1648,6 +1679,10 @@ bond_ethdev_mode_set(struct rte_eth_dev >> *eth_dev, uint8_t mode) >>           eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; >>           eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; >>           break; >> +    case BONDING_MODE_DIRECT: >> +        eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_direct; >> +        eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; >> +        break; >>       default: >>           return -1; >>       } >> @@ -2581,6 +2616,7 @@ bond_ethdev_link_update(struct rte_eth_dev >> *ethdev, int wait_to_complete) >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_TLB: >>       case BONDING_MODE_ALB: >> +    case BONDING_MODE_DIRECT: >>       default: >>           /** >>            * In theses mode the maximum theoretical link speed is the >> sum >> @@ -2678,6 +2714,7 @@ bond_ethdev_promiscuous_enable(struct >> rte_eth_dev *eth_dev) >>       case BONDING_MODE_ROUND_ROBIN: >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >> +    case BONDING_MODE_DIRECT: >>       case BONDING_MODE_8023AD: { >>           unsigned int member_ok = 0; >>   @@ -2732,6 +2769,7 @@ bond_ethdev_promiscuous_disable(struct >> rte_eth_dev *dev) >>       case BONDING_MODE_ROUND_ROBIN: >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >> +    case BONDING_MODE_DIRECT: >>       case BONDING_MODE_8023AD: { >>           unsigned int member_ok = 0; >>   @@ -2790,6 +2828,7 @@ bond_ethdev_promiscuous_update(struct >> rte_eth_dev *dev) >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >>       case BONDING_MODE_8023AD: >> +    case BONDING_MODE_DIRECT: >>           /* As promiscuous mode is propagated to all members for these >>            * mode, no need to update for bonding device. >>            */ >> @@ -2825,6 +2864,7 @@ bond_ethdev_allmulticast_enable(struct >> rte_eth_dev *eth_dev) >>       case BONDING_MODE_ROUND_ROBIN: >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >> +    case BONDING_MODE_DIRECT: >>       case BONDING_MODE_8023AD: { >>           unsigned int member_ok = 0; >>   @@ -2879,6 +2919,7 @@ bond_ethdev_allmulticast_disable(struct >> rte_eth_dev *eth_dev) >>       case BONDING_MODE_ROUND_ROBIN: >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >> +    case BONDING_MODE_DIRECT: >>       case BONDING_MODE_8023AD: { >>           unsigned int member_ok = 0; >>   @@ -2936,6 +2977,7 @@ bond_ethdev_allmulticast_update(struct >> rte_eth_dev *dev) >>       case BONDING_MODE_BALANCE: >>       case BONDING_MODE_BROADCAST: >>       case BONDING_MODE_8023AD: >> +    case BONDING_MODE_DIRECT: >>           /* As allmulticast mode is propagated to all members for these >>            * mode, no need to update for bonding device. >>            */ >> @@ -3365,6 +3407,8 @@ bond_mode_name(uint8_t mode) >>           return "TLB"; >>       case BONDING_MODE_ALB: >>           return "ALB"; >> +    case BONDING_MODE_DIRECT: >> +        return "DIRECT"; >>       default: >>           return "Unknown"; >>       }