From: "Wiles, Keith" <keith.wiles@intel.com>
To: Ophir Munk <ophirmu@mellanox.com>
Cc: "dev@dpdk.org" <dev@dpdk.org>,
Pascal Mazon <pascal.mazon@6wind.com>,
Thomas Monjalon <thomas@monjalon.net>,
Olga Shern <olgas@mellanox.com>
Subject: Re: [dpdk-dev] [PATCH v4 1/2] net/tap: calculate checksums of multi segs packets
Date: Tue, 12 Jun 2018 17:17:35 +0000 [thread overview]
Message-ID: <848DE22E-9581-4F40-AEF8-52EDEC425DFE@intel.com> (raw)
In-Reply-To: <1528821108-12405-2-git-send-email-ophirmu@mellanox.com>
A few formatting problems I have noticed. We can review the code logic in a meeting.
> On Jun 12, 2018, at 11:31 AM, Ophir Munk <ophirmu@mellanox.com> wrote:
>
> Prior to this commit IP/UDP/TCP checksum offload calculations
> were skipped in case of a multi segments packet.
> This commit enables TAP checksum calculations for multi segments
> packets.
> The only restriction is that the first segment must contain
> headers of layers 3 (IP) and 4 (UDP or TCP)
>
> Reviewed-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
> drivers/net/tap/rte_eth_tap.c | 158 +++++++++++++++++++++++++++++-------------
> 1 file changed, 110 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index df396bf..c19f053 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -415,12 +415,43 @@ tap_tx_offload_get_queue_capa(void)
> DEV_TX_OFFLOAD_TCP_CKSUM;
> }
>
> +/* Finalize l4 checksum calculation */
> static void
> -tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len,
> - unsigned int l3_len)
> +tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum,
> + uint32_t l4_raw_cksum)
> {
> - void *l3_hdr = packet + l2_len;
> + if (l4_cksum) {
> + uint32_t cksum;
> +
> + cksum = __rte_raw_cksum_reduce(l4_raw_cksum);
> + cksum += l4_phdr_cksum;
> +
> + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> + cksum = (~cksum) & 0xffff;
> + if (cksum == 0)
> + cksum = 0xffff;
> + *l4_cksum = cksum;
> + }
> +}
>
> +/* Accumaulate L4 raw checksums */
> +static void
> +tap_tx_l4_add_rcksum(char *l4_data, unsigned int l4_len, uint16_t *l4_cksum,
> + uint32_t *l4_raw_cksum)
> +{
> + if (l4_cksum == NULL)
> + return;
> +
> + *l4_raw_cksum = __rte_raw_cksum(l4_data, l4_len, *l4_raw_cksum);
> +}
> +
> +/* L3 and L4 pseudo headers checksum offloads */
> +static void
> +tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len,
> + unsigned int l3_len, unsigned int l4_len, uint16_t **l4_cksum,
> + uint16_t *l4_phdr_cksum, uint32_t *l4_raw_cksum)
> +{
> + void *l3_hdr = packet + l2_len;
Needs a blank line here.
> if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) {
> struct ipv4_hdr *iph = l3_hdr;
> uint16_t cksum;
> @@ -430,38 +461,21 @@ tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len,
> iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum;
> }
> if (ol_flags & PKT_TX_L4_MASK) {
> - uint16_t l4_len;
> - uint32_t cksum;
> - uint16_t *l4_cksum;
> void *l4_hdr;
>
> l4_hdr = packet + l2_len + l3_len;
> if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
> - l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum;
> + *l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum;
> else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM)
> - l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum;
> + *l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum;
> else
> return;
> - *l4_cksum = 0;
> - if (ol_flags & PKT_TX_IPV4) {
> - struct ipv4_hdr *iph = l3_hdr;
> -
> - l4_len = rte_be_to_cpu_16(iph->total_length) - l3_len;
> - cksum = rte_ipv4_phdr_cksum(l3_hdr, 0);
> - } else {
> - struct ipv6_hdr *ip6h = l3_hdr;
> -
> - /* payload_len does not include ext headers */
> - l4_len = rte_be_to_cpu_16(ip6h->payload_len) -
> - l3_len + sizeof(struct ipv6_hdr);
> - cksum = rte_ipv6_phdr_cksum(l3_hdr, 0);
> - }
> - cksum += rte_raw_cksum(l4_hdr, l4_len);
> - cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> - cksum = (~cksum) & 0xffff;
> - if (cksum == 0)
> - cksum = 0xffff;
> - *l4_cksum = cksum;
> + **l4_cksum = 0;
> + if (ol_flags & PKT_TX_IPV4)
> + *l4_phdr_cksum = rte_ipv4_phdr_cksum(l3_hdr, 0);
> + else
> + *l4_phdr_cksum = rte_ipv6_phdr_cksum(l3_hdr, 0);
> + *l4_raw_cksum = __rte_raw_cksum(l4_hdr, l4_len, 0);
> }
> }
>
> @@ -482,17 +496,27 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> max_size = *txq->mtu + (ETHER_HDR_LEN + ETHER_CRC_LEN + 4);
> for (i = 0; i < nb_pkts; i++) {
> struct rte_mbuf *mbuf = bufs[num_tx];
> - struct iovec iovecs[mbuf->nb_segs + 1];
> + struct iovec iovecs[mbuf->nb_segs + 2];
> struct tun_pi pi = { .flags = 0, .proto = 0x00 };
> struct rte_mbuf *seg = mbuf;
> char m_copy[mbuf->data_len];
> + int proto;
> int n;
> int j;
> + int k; /* first index in iovecs for copying segments */
> + uint16_t l234_hlen; /* length of layers 2,3,4 headers */
> + uint16_t seg_len; /* length of first segment */
> + uint16_t nb_segs;
> + uint16_t *l4_cksum; /* l4 checksum (pseudo header + payload) */
> + uint32_t l4_raw_cksum = 0; /* TCP/UDP payload raw checksum */
> + uint16_t l4_phdr_cksum = 0; /* TCP/UDP pseudo header checksum */
> + uint16_t is_cksum = 0; /* in case cksum should be offloaded */
>
> /* stats.errs will be incremented */
> if (rte_pktmbuf_pkt_len(mbuf) > max_size)
> break;
>
> + l4_cksum = NULL;
> if (txq->type == ETH_TUNTAP_TYPE_TUN) {
> /*
> * TUN and TAP are created with IFF_NO_PI disabled.
> @@ -505,35 +529,73 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> * is 4 or 6, then protocol field is updated.
> */
> char *buff_data = rte_pktmbuf_mtod(seg, void *);
> - j = (*buff_data & 0xf0);
> - pi.proto = (j == 0x40) ? rte_cpu_to_be_16(ETHER_TYPE_IPv4) :
> - (j == 0x60) ? rte_cpu_to_be_16(ETHER_TYPE_IPv6) : 0x00;
> + proto = (*buff_data & 0xf0);
> + pi.proto = (proto == 0x40) ?
> + rte_cpu_to_be_16(ETHER_TYPE_IPv4) :
> + ((proto == 0x60) ?
> + rte_cpu_to_be_16(ETHER_TYPE_IPv6) :
> + 0x00);
> }
>
> - iovecs[0].iov_base = π
> - iovecs[0].iov_len = sizeof(pi);
> - for (j = 1; j <= mbuf->nb_segs; j++) {
> - iovecs[j].iov_len = rte_pktmbuf_data_len(seg);
> - iovecs[j].iov_base =
> - rte_pktmbuf_mtod(seg, void *);
> - seg = seg->next;
> - }
> + k = 0;
> + iovecs[k].iov_base = π
> + iovecs[k].iov_len = sizeof(pi);
> + k++;
Blank lines are always good.
> + nb_segs = mbuf->nb_segs;
> if (txq->csum &&
> ((mbuf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4) ||
> (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM ||
> (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM))) {
> - /* Support only packets with all data in the same seg */
> - if (mbuf->nb_segs > 1)
> + is_cksum = 1;
Blank line would be nice.
> + /* Support only packets with at least layer 4
> + * header included in the first segment
> + */
> + seg_len = rte_pktmbuf_data_len(mbuf);
> + l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
> + if (seg_len < l234_hlen)
> break;
> - /* To change checksums, work on a copy of data. */
> +
> + /* To change checksums, work on a
> + * copy of l2, l3 l4 headers.
> + */
> rte_memcpy(m_copy, rte_pktmbuf_mtod(mbuf, void *),
> - rte_pktmbuf_data_len(mbuf));
> - tap_tx_offload(m_copy, mbuf->ol_flags,
> - mbuf->l2_len, mbuf->l3_len);
> - iovecs[1].iov_base = m_copy;
> + l234_hlen);
> + tap_tx_l3_cksum(m_copy, mbuf->ol_flags,
> + mbuf->l2_len, mbuf->l3_len, mbuf->l4_len,
> + &l4_cksum, &l4_phdr_cksum,
> + &l4_raw_cksum);
> + iovecs[k].iov_base = m_copy;
> + iovecs[k].iov_len = l234_hlen;
> + k++;
Some blank lines would make this a lot more readable, like one here.
> + /* Update next iovecs[] beyond l2, l3, l4 headers */
> + if (seg_len > l234_hlen) {
> + iovecs[k].iov_len = seg_len - l234_hlen;
> + iovecs[k].iov_base =
> + rte_pktmbuf_mtod(seg, char *) +
> + l234_hlen;
> + tap_tx_l4_add_rcksum(iovecs[k].iov_base,
> + iovecs[k].iov_len, l4_cksum,
> + &l4_raw_cksum);
> + k++;
> + nb_segs++;
> + }
> + seg = seg->next;
> }
Another place a blank line would be nice.
> + for (j = k; j <= nb_segs; j++) {
> + iovecs[j].iov_len = rte_pktmbuf_data_len(seg);
> + iovecs[j].iov_base = rte_pktmbuf_mtod(seg, void *);
> + if (is_cksum)
> + tap_tx_l4_add_rcksum(iovecs[j].iov_base,
> + iovecs[j].iov_len, l4_cksum,
> + &l4_raw_cksum);
> + seg = seg->next;
> + }
> +
> + if (is_cksum)
> + tap_tx_l4_cksum(l4_cksum, l4_phdr_cksum, l4_raw_cksum);
> +
> /* copy the tx frame data */
> - n = writev(txq->fd, iovecs, mbuf->nb_segs + 1);
> + n = writev(txq->fd, iovecs, j);
> if (n <= 0)
> break;
>
> --
> 2.7.4
>
Regards,
Keith
next prev parent reply other threads:[~2018-06-12 17:17 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-09 21:10 [dpdk-dev] [RFC 0/2] TAP TSO Implementation Ophir Munk
2018-03-09 21:10 ` [dpdk-dev] [RFC 1/2] net/tap: calculate checksum for multi segs packets Ophir Munk
2018-04-09 22:33 ` [dpdk-dev] [PATCH v1 0/2] TAP TSO Ophir Munk
2018-04-09 22:33 ` [dpdk-dev] [PATCH v1 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-04-09 22:33 ` [dpdk-dev] [PATCH v1 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-04-22 11:30 ` [dpdk-dev] [PATCH v2 0/2] TAP TSO Ophir Munk
2018-04-22 11:30 ` [dpdk-dev] [PATCH v2 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-05-07 21:54 ` [dpdk-dev] [PATCH v3 0/2] TAP TSO Ophir Munk
2018-05-07 21:54 ` [dpdk-dev] [PATCH v3 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-05-07 21:54 ` [dpdk-dev] [PATCH v3 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-05-31 13:52 ` [dpdk-dev] [PATCH v2 1/2] net/tap: calculate checksums of multi segs packets Ferruh Yigit
2018-05-31 13:54 ` Ferruh Yigit
2018-04-22 11:30 ` [dpdk-dev] [PATCH v2 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-06-12 16:31 ` [dpdk-dev] [PATCH v4 0/2] TAP TSO Ophir Munk
2018-06-12 16:31 ` [dpdk-dev] [PATCH v4 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-06-12 17:17 ` Wiles, Keith [this message]
2018-06-12 16:31 ` [dpdk-dev] [PATCH v4 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-06-12 17:22 ` Wiles, Keith
2018-06-13 16:04 ` Wiles, Keith
2018-06-14 7:59 ` Ophir Munk
2018-06-14 12:58 ` Wiles, Keith
2018-06-23 23:17 ` [dpdk-dev] [PATCH v5 0/2] TAP TSO Ophir Munk
2018-06-23 23:17 ` [dpdk-dev] [PATCH v5 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-06-24 13:45 ` Wiles, Keith
2018-06-27 13:11 ` Ferruh Yigit
2018-06-23 23:17 ` [dpdk-dev] [PATCH v5 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-03-09 21:10 ` [dpdk-dev] [RFC 2/2] net/tap: implement TAP TSO Ophir Munk
2018-04-09 16:38 ` [dpdk-dev] [RFC 0/2] TAP TSO Implementation Ferruh Yigit
2018-04-09 22:37 ` Ophir Munk
2018-04-10 14:30 ` Ferruh Yigit
2018-04-10 15:31 ` Ophir Munk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=848DE22E-9581-4F40-AEF8-52EDEC425DFE@intel.com \
--to=keith.wiles@intel.com \
--cc=dev@dpdk.org \
--cc=olgas@mellanox.com \
--cc=ophirmu@mellanox.com \
--cc=pascal.mazon@6wind.com \
--cc=thomas@monjalon.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).