DPDK usage discussions
 help / color / mirror / Atom feed
* rss calculation as the nic
@ 2024-02-03 17:03 Yaron Illouz
  2024-02-14 16:07 ` Ori Kam
  0 siblings, 1 reply; 5+ messages in thread
From: Yaron Illouz @ 2024-02-03 17:03 UTC (permalink / raw)
  To: dev, 'users@dpdk.org'

[-- Attachment #1: Type: text/plain, Size: 4749 bytes --]

I am using mellanox Connectx6, dpdk 22
'MT2892 Family [ConnectX-6 Dx] 101d' if=ens5f1 drv=mlx5_core unused=igb_uio
I configure port with multiqueue and split traffic according to ip+port
I want to calculate the hash as the nic do, to be able to load balance traffic ( from another card ) - the information is inside the packet and not in in ip and transport layer. For this purpose i need to be able to calculate the hash value as the nic do for the first nic.
Here is the code i use to split traffic to rx queues
        /*rte flow*/
        const int MAX_PATTERN_IN_FLOW = 10;
        const int MAX_ACTIONS_IN_FLOW = 10;

        struct rte_flow_attr attr;

        struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW];
        struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW];
        struct rte_flow *flow;
        struct rte_flow_error error;

        memset(pattern, 0, sizeof(pattern));
        memset(actions, 0, sizeof(actions));

        /* Set the rule attribute, only ingress packets will be checked. 8< */
        memset(&attr, 0, sizeof(struct rte_flow_attr));
        attr.ingress = 1;

        pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
        pattern[0].spec = NULL;

        pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
        pattern[1].spec = NULL;

        pattern[2].type = RTE_FLOW_ITEM_TYPE_GRE;
        pattern[2].spec = NULL;

        pattern[3].type = RTE_FLOW_ITEM_TYPE_ETH;
        pattern[3].spec = NULL;

        pattern[4].type = RTE_FLOW_ITEM_TYPE_IPV4;
        pattern[4].spec = NULL;

        pattern[5].type = RTE_FLOW_ITEM_TYPE_UDP;
        pattern[5].spec = NULL;

        // end the pattern array
        pattern[6].type = RTE_FLOW_ITEM_TYPE_END;

        struct rte_flow_action_rss rss_conf;
        uint16_t queues[pi_nNumRxQueues];
        rss_conf.func = RTE_ETH_HASH_FUNCTION_DEFAULT;
        uint64_t hf = RTE_ETH_RSS_IP | RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_SCTP;
        hf &= pi_devInfo.flow_type_rss_offloads;
        rss_conf.types = hf;
        rss_conf.queue_num = pi_nNumRxQueues;
        for (int nqQueueIndex= 0; nqQueueIndex < pi_nNumRxQueues; nqQueueIndex++)
                queues[nqQueueIndex] = nqQueueIndex;

        rss_conf.queue = queues;

        rss_conf.key_len = 0;
        rss_conf.key = NULL;
        rss_conf.level = 2;

        // create the drop action
        actions[0].type = RTE_FLOW_ACTION_TYPE_RSS;
        actions[0].conf = &rss_conf;
        actions[1].type = RTE_FLOW_ACTION_TYPE_END;

        // validate and create the flow rule
        if (rte_flow_validate(pi_nPort, &attr, pattern, actions, &error)==0)
        {
            flow = rte_flow_create(pi_nPort, &attr, pattern, actions, &error);
            if(flow){//success}
            else{//error}
        }
        else {error}

    }
And this is how i tried to get the hash value saved in the mbuf, but failed
uint8_t rss_hash_default_key[] = {
    0x2c, 0xc6, 0x81, 0xd1,
    0x5b, 0xdb, 0xf4, 0xf7,
    0xfc, 0xa2, 0x83, 0x19,
    0xdb, 0x1a, 0x3e, 0x94,
    0x6b, 0x9e, 0x38, 0xd9,
    0x2c, 0x9c, 0x03, 0xd1,
    0xad, 0x99, 0x44, 0xa7,
    0xd9, 0x56, 0x3d, 0x59,
    0x06, 0x3c, 0x25, 0xf3,
    0xfc, 0x1f, 0xdc, 0x2a,
};

static inline uint32_t
do_softrss(struct rte_mbuf *m)
{
    uint32_t input_len;
    struct rte_ipv4_tuple ipv4_tuple;

    char * pRawPacket = static_cast<char*>(rte_pktmbuf_mtod(pi_mbuf, void* ));
    IpHeader * pIpHeader = (IpHeader *)(pRawPacket + offsetOfIp);
    if(pIpHeader->GetVersion()==4)
    {
        ipv4_tuple.src_addr = rte_be_to_cpu_32(pIpHeader->dwSrcAddressBigEndian);
        ipv4_tuple.dst_addr = rte_be_to_cpu_32(pIpHeader->dwDstAddressBigEndian);
        ipv4_tuple.sport = *(uint16_t*)(pRawPacket + transportLayerOffset);
        ipv4_tuple.dport = *(uint16_t*)(pRawPacket + transportLayerOffset+2);
        input_len = RTE_THASH_V4_L3_LEN;
        return rte_softrss_be((uint32_t *)&ipv4_tuple, input_len, rss_key_be);

    }
    return 0;
}
new_rss = do_softrss(mbuf_pointer);
std::cout<< std::hex << mbuf_pointer->hash.rss << " -> " << new_rss << std::dec << std::endl;

And i get a different value than the mbuf_pointer->hash.rss
5ed28a5c -> 33eb33eb
974c1896 -> 24e224e2
1edf1638 -> 21752175
8a54c19 -> 80638063
459a6f76 -> 1b351b35
1cdf1d1c -> e53be53b


****  I understand it is possible to do it, but i don't get the same value  ***
ethtool -i ens5f0
driver: mlx5_core
version: 5.8-3.0.7
firmware-version: 22.32.2004 (MT_0000000437)
expansion-rom-version:
bus-info: 0000:83:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: yes


[-- Attachment #2: Type: text/html, Size: 42065 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: rss calculation as the nic
  2024-02-03 17:03 rss calculation as the nic Yaron Illouz
@ 2024-02-14 16:07 ` Ori Kam
  2024-02-21 17:10   ` Yaron Illouz
  0 siblings, 1 reply; 5+ messages in thread
From: Ori Kam @ 2024-02-14 16:07 UTC (permalink / raw)
  To: Yaron Illouz, dev, 'users@dpdk.org'

[-- Attachment #1: Type: text/plain, Size: 1963 bytes --]

Hi Yaron,

I can see some possible issues with your code, please see below.

From: Yaron Illouz <yaroni@radcom.com>
Sent: Saturday, February 3, 2024 7:03 PM
To: dev@dpdk.org; 'users@dpdk.org' <users@dpdk.org>
Subject: rss calculation as the nic

[Snip]

static inline uint32_t
do_softrss(struct rte_mbuf *m)
{
    uint32_t input_len;
    struct rte_ipv4_tuple ipv4_tuple;

    char * pRawPacket = static_cast<char*>(rte_pktmbuf_mtod(pi_mbuf, void* ));
    IpHeader * pIpHeader = (IpHeader *)(pRawPacket + offsetOfIp);
    if(pIpHeader->GetVersion()==4)
    {
        ipv4_tuple.src_addr = rte_be_to_cpu_32(pIpHeader->dwSrcAddressBigEndian);
        ipv4_tuple.dst_addr = rte_be_to_cpu_32(pIpHeader->dwDstAddressBigEndian);
[Ori] here you are converting BE to CPU
        ipv4_tuple.sport = *(uint16_t*)(pRawPacket + transportLayerOffset);
        ipv4_tuple.dport = *(uint16_t*)(pRawPacket + transportLayerOffset+2);
[Ori] here you keep the values as BE
        input_len = RTE_THASH_V4_L3_LEN;
        return rte_softrss_be((uint32_t *)&ipv4_tuple, input_len, rss_key_be);
[Ori] the above function expects to get everything in BE and  you should also call rte_convert_rss_key before the use of the above function.
    }
    return 0;
}
new_rss = do_softrss(mbuf_pointer);
std::cout<< std::hex << mbuf_pointer->hash.rss << " -> " << new_rss << std::dec << std::endl;

And i get a different value than the mbuf_pointer->hash.rss
5ed28a5c -> 33eb33eb
974c1896 -> 24e224e2
1edf1638 -> 21752175
8a54c19 -> 80638063
459a6f76 -> 1b351b35
1cdf1d1c -> e53be53b


****  I understand it is possible to do it, but i don't get the same value  ***
ethtool -i ens5f0
driver: mlx5_core
version: 5.8-3.0.7
firmware-version: 22.32.2004 (MT_0000000437)
expansion-rom-version:
bus-info: 0000:83:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: yes


[-- Attachment #2: Type: text/html, Size: 16155 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: rss calculation as the nic
  2024-02-14 16:07 ` Ori Kam
@ 2024-02-21 17:10   ` Yaron Illouz
  2024-02-22  6:49     ` Pavel Vazharov
  0 siblings, 1 reply; 5+ messages in thread
From: Yaron Illouz @ 2024-02-21 17:10 UTC (permalink / raw)
  To: Ori Kam, dev, 'users@dpdk.org'

[-- Attachment #1: Type: text/plain, Size: 2688 bytes --]

Hi Ori

Thank you for your answer it helped me

But I can see the load balancing is not done to the same instance for both directions
I would like to have ip + port pairs arrive to same instance

Now ip1+port1 -> ip2+port2 arrive to instance x and ip2+port2 -> ip1+port1 arrive to another instance

How can I solve this?

From: Ori Kam <orika@nvidia.com>
Sent: Wednesday, February 14, 2024 6:07 PM
To: Yaron Illouz <yaroni@radcom.com>; dev@dpdk.org; 'users@dpdk.org' <users@dpdk.org>
Subject: RE: rss calculation as the nic

EXTERNAL EMAIL: Do not click links or attachments unless you recognize the sender and know the content is safe

Hi Yaron,

I can see some possible issues with your code, please see below.

From: Yaron Illouz <yaroni@radcom.com<mailto:yaroni@radcom.com>>
Sent: Saturday, February 3, 2024 7:03 PM
To: dev@dpdk.org<mailto:dev@dpdk.org>; 'users@dpdk.org' <users@dpdk.org<mailto:users@dpdk.org>>
Subject: rss calculation as the nic

[Snip]

static inline uint32_t
do_softrss(struct rte_mbuf *m)
{
    uint32_t input_len;
    struct rte_ipv4_tuple ipv4_tuple;

    char * pRawPacket = static_cast<char*>(rte_pktmbuf_mtod(pi_mbuf, void* ));
    IpHeader * pIpHeader = (IpHeader *)(pRawPacket + offsetOfIp);
    if(pIpHeader->GetVersion()==4)
    {
        ipv4_tuple.src_addr = rte_be_to_cpu_32(pIpHeader->dwSrcAddressBigEndian);
        ipv4_tuple.dst_addr = rte_be_to_cpu_32(pIpHeader->dwDstAddressBigEndian);
[Ori] here you are converting BE to CPU
        ipv4_tuple.sport = *(uint16_t*)(pRawPacket + transportLayerOffset);
        ipv4_tuple.dport = *(uint16_t*)(pRawPacket + transportLayerOffset+2);
[Ori] here you keep the values as BE
        input_len = RTE_THASH_V4_L3_LEN;
        return rte_softrss_be((uint32_t *)&ipv4_tuple, input_len, rss_key_be);
[Ori] the above function expects to get everything in BE and  you should also call rte_convert_rss_key before the use of the above function.
    }
    return 0;
}
new_rss = do_softrss(mbuf_pointer);
std::cout<< std::hex << mbuf_pointer->hash.rss << " -> " << new_rss << std::dec << std::endl;

And i get a different value than the mbuf_pointer->hash.rss
5ed28a5c -> 33eb33eb
974c1896 -> 24e224e2
1edf1638 -> 21752175
8a54c19 -> 80638063
459a6f76 -> 1b351b35
1cdf1d1c -> e53be53b


****  I understand it is possible to do it, but i don't get the same value  ***
ethtool -i ens5f0
driver: mlx5_core
version: 5.8-3.0.7
firmware-version: 22.32.2004 (MT_0000000437)
expansion-rom-version:
bus-info: 0000:83:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: yes


[-- Attachment #2: Type: text/html, Size: 18138 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: rss calculation as the nic
  2024-02-21 17:10   ` Yaron Illouz
@ 2024-02-22  6:49     ` Pavel Vazharov
  2024-02-25  8:22       ` Ori Kam
  0 siblings, 1 reply; 5+ messages in thread
From: Pavel Vazharov @ 2024-02-22  6:49 UTC (permalink / raw)
  To: Yaron Illouz; +Cc: Ori Kam, dev, users

[-- Attachment #1: Type: text/plain, Size: 3447 bytes --]

On Wed, Feb 21, 2024 at 9:40 PM Yaron Illouz <yaroni@radcom.com> wrote:

> Hi Ori
>
> Thank you for your answer it helped me
>
> But I can see the load balancing is not done to the same instance for
> both directions
> I would like to have ip + port pairs arrive to same instance
>
> Now ip1+port1 -> ip2+port2 arrive to instance x and ip2+port2 -> ip1+port1
> arrive to another instance
>
> How can I solve this?
>
>
>
> *From:* Ori Kam <orika@nvidia.com>
> *Sent:* Wednesday, February 14, 2024 6:07 PM
> *To:* Yaron Illouz <yaroni@radcom.com>; dev@dpdk.org; 'users@dpdk.org' <
> users@dpdk.org>
> *Subject:* RE: rss calculation as the nic
>
>
>
> *EXTERNAL EMAIL:* Do not click links or attachments unless you recognize
> the sender and know the content is safe
>
>
>
> Hi Yaron,
>
>
>
> I can see some possible issues with your code, please see below.
>
If I understand your case correctly, you need to use a symmetric RSS key.
We use this one in production -
https://www.ndsl.kaist.edu/~kyoungsoo/papers/TR-symRSS.pdf
And you'll need to set it accordingly when configuring the device:
        port_conf.rx_adv_conf.rss_conf.rss_key     = rss_key;

        port_conf.rx_adv_conf.rss_conf.rss_key_len = rss_key_size;

HTH.


>
> *From:* Yaron Illouz <yaroni@radcom.com>
> *Sent:* Saturday, February 3, 2024 7:03 PM
> *To:* dev@dpdk.org; 'users@dpdk.org' <users@dpdk.org>
> *Subject:* rss calculation as the nic
>
>
>
> [Snip]
>
>
>
> static inline uint32_t
>
> do_softrss(struct rte_mbuf *m)
>
> {
>
>     uint32_t input_len;
>
>     struct rte_ipv4_tuple ipv4_tuple;
>
>
>
>     char * pRawPacket = static_cast<char*>(rte_pktmbuf_mtod(pi_mbuf, void*
> ));
>
>     IpHeader * pIpHeader = (IpHeader *)(pRawPacket + offsetOfIp);
>
>     if(pIpHeader->GetVersion()==4)
>
>     {
>
>         ipv4_tuple.src_addr =
> rte_be_to_cpu_32(pIpHeader->dwSrcAddressBigEndian);
>
>         ipv4_tuple.dst_addr =
> rte_be_to_cpu_32(pIpHeader->dwDstAddressBigEndian);
>
> [Ori] here you are converting BE to CPU
>
>         ipv4_tuple.sport = *(uint16_t*)(pRawPacket + transportLayerOffset);
>
>         ipv4_tuple.dport = *(uint16_t*)(pRawPacket +
> transportLayerOffset+2);
>
> [Ori] here you keep the values as BE
>
>         input_len = RTE_THASH_V4_L3_LEN;
>
>         return rte_softrss_be((uint32_t *)&ipv4_tuple, input_len,
> rss_key_be);
>
> [Ori] the above function expects to get everything in BE and  you should
> also call rte_convert_rss_key before the use of the above function.
>
>     }
>
>     return 0;
>
> }
>
> new_rss = do_softrss(mbuf_pointer);
>
> std::cout<< std::hex << mbuf_pointer->hash.rss << " -> " << new_rss <<
> std::dec << std::endl;
>
>
>
> And i get a different value than the mbuf_pointer->hash.rss
>
> 5ed28a5c -> 33eb33eb
>
> 974c1896 -> 24e224e2
>
> 1edf1638 -> 21752175
>
> 8a54c19 -> 80638063
>
> 459a6f76 -> 1b351b35
>
> 1cdf1d1c -> e53be53b
>
>
>
>
>
> ****  I understand it is possible to do it, but i don't get the same
> value  ***
>
> ethtool -i ens5f0
>
> driver: mlx5_core
>
> version: 5.8-3.0.7
>
> firmware-version: 22.32.2004 (MT_0000000437)
>
> expansion-rom-version:
>
> bus-info: 0000:83:00.0
>
> supports-statistics: yes
>
> supports-test: yes
>
> supports-eeprom-access: no
>
> supports-register-dump: no
>
> supports-priv-flags: yes
>
>
>

[-- Attachment #2: Type: text/html, Size: 17207 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: rss calculation as the nic
  2024-02-22  6:49     ` Pavel Vazharov
@ 2024-02-25  8:22       ` Ori Kam
  0 siblings, 0 replies; 5+ messages in thread
From: Ori Kam @ 2024-02-25  8:22 UTC (permalink / raw)
  To: Pavel Vazharov, Yaron Illouz; +Cc: dev, users

[-- Attachment #1: Type: text/plain, Size: 3819 bytes --]

Hi Pavel,

You can use a symmetric key,
Or even better, in case of MLNX devices you can use RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ_SORT as the hash function,
This will sort the fields based on their value and only do the hash, resulting in a symmetric hash.

Best,
Ori


From: Pavel Vazharov <freakpv@gmail.com>
Sent: Thursday, February 22, 2024 8:49 AM
To: Yaron Illouz <yaroni@radcom.com>
Cc: Ori Kam <orika@nvidia.com>; dev@dpdk.org; users@dpdk.org
Subject: Re: rss calculation as the nic



On Wed, Feb 21, 2024 at 9:40 PM Yaron Illouz <yaroni@radcom.com<mailto:yaroni@radcom.com>> wrote:
Hi Ori

Thank you for your answer it helped me

But I can see the load balancing is not done to the same instance for both directions
I would like to have ip + port pairs arrive to same instance

Now ip1+port1 -> ip2+port2 arrive to instance x and ip2+port2 -> ip1+port1 arrive to another instance

How can I solve this?

From: Ori Kam <orika@nvidia.com<mailto:orika@nvidia.com>>
Sent: Wednesday, February 14, 2024 6:07 PM
To: Yaron Illouz <yaroni@radcom.com<mailto:yaroni@radcom.com>>; dev@dpdk.org<mailto:dev@dpdk.org>; 'users@dpdk.org<mailto:users@dpdk.org>' <users@dpdk.org<mailto:users@dpdk.org>>
Subject: RE: rss calculation as the nic

EXTERNAL EMAIL: Do not click links or attachments unless you recognize the sender and know the content is safe

Hi Yaron,

I can see some possible issues with your code, please see below.
If I understand your case correctly, you need to use a symmetric RSS key.
We use this one in production - https://www.ndsl.kaist.edu/~kyoungsoo/papers/TR-symRSS.pdf
And you'll need to set it accordingly when configuring the device:
        port_conf.rx_adv_conf.rss_conf.rss_key     = rss_key;
        port_conf.rx_adv_conf.rss_conf.rss_key_len = rss_key_size;
HTH.


From: Yaron Illouz <yaroni@radcom.com<mailto:yaroni@radcom.com>>
Sent: Saturday, February 3, 2024 7:03 PM
To: dev@dpdk.org<mailto:dev@dpdk.org>; 'users@dpdk.org<mailto:users@dpdk.org>' <users@dpdk.org<mailto:users@dpdk.org>>
Subject: rss calculation as the nic

[Snip]

static inline uint32_t
do_softrss(struct rte_mbuf *m)
{
    uint32_t input_len;
    struct rte_ipv4_tuple ipv4_tuple;

    char * pRawPacket = static_cast<char*>(rte_pktmbuf_mtod(pi_mbuf, void* ));
    IpHeader * pIpHeader = (IpHeader *)(pRawPacket + offsetOfIp);
    if(pIpHeader->GetVersion()==4)
    {
        ipv4_tuple.src_addr = rte_be_to_cpu_32(pIpHeader->dwSrcAddressBigEndian);
        ipv4_tuple.dst_addr = rte_be_to_cpu_32(pIpHeader->dwDstAddressBigEndian);
[Ori] here you are converting BE to CPU
        ipv4_tuple.sport = *(uint16_t*)(pRawPacket + transportLayerOffset);
        ipv4_tuple.dport = *(uint16_t*)(pRawPacket + transportLayerOffset+2);
[Ori] here you keep the values as BE
        input_len = RTE_THASH_V4_L3_LEN;
        return rte_softrss_be((uint32_t *)&ipv4_tuple, input_len, rss_key_be);
[Ori] the above function expects to get everything in BE and  you should also call rte_convert_rss_key before the use of the above function.
    }
    return 0;
}
new_rss = do_softrss(mbuf_pointer);
std::cout<< std::hex << mbuf_pointer->hash.rss << " -> " << new_rss << std::dec << std::endl;

And i get a different value than the mbuf_pointer->hash.rss
5ed28a5c -> 33eb33eb
974c1896 -> 24e224e2
1edf1638 -> 21752175
8a54c19 -> 80638063
459a6f76 -> 1b351b35
1cdf1d1c -> e53be53b


****  I understand it is possible to do it, but i don't get the same value  ***
ethtool -i ens5f0
driver: mlx5_core
version: 5.8-3.0.7
firmware-version: 22.32.2004 (MT_0000000437)
expansion-rom-version:
bus-info: 0000:83:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: yes


[-- Attachment #2: Type: text/html, Size: 24382 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-02-25  8:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-03 17:03 rss calculation as the nic Yaron Illouz
2024-02-14 16:07 ` Ori Kam
2024-02-21 17:10   ` Yaron Illouz
2024-02-22  6:49     ` Pavel Vazharov
2024-02-25  8:22       ` Ori Kam

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).