From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 54C4F592E for ; Fri, 25 Jul 2014 10:04:33 +0200 (CEST) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga102.jf.intel.com with ESMTP; 25 Jul 2014 01:00:22 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.01,729,1400050800"; d="scan'208";a="548863537" Received: from fmsmsx108.amr.corp.intel.com ([10.19.9.228]) by orsmga001.jf.intel.com with ESMTP; 25 Jul 2014 01:06:02 -0700 Received: from shsmsx104.ccr.corp.intel.com (10.239.4.70) by FMSMSX108.amr.corp.intel.com (10.19.9.228) with Microsoft SMTP Server (TLS) id 14.3.123.3; Fri, 25 Jul 2014 01:06:02 -0700 Received: from shsmsx101.ccr.corp.intel.com ([169.254.1.52]) by SHSMSX104.ccr.corp.intel.com ([169.254.5.204]) with mapi id 14.03.0123.003; Fri, 25 Jul 2014 16:06:00 +0800 From: "Xie, Huawei" To: "Ouyang, Changchun" , "dev@dpdk.org" Thread-Topic: [dpdk-dev] [PATCH v2] virtio: Support mergeable buffer in virtio PMD Thread-Index: AQHPp85A0mBs6WZtwk+TgQBU4+1rLJuwbIsw Date: Fri, 25 Jul 2014 08:05:59 +0000 Message-ID: References: <1406268195-24010-1-git-send-email-changchun.ouyang@intel.com> In-Reply-To: <1406268195-24010-1-git-send-email-changchun.ouyang@intel.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.239.127.40] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH v2] virtio: Support mergeable buffer in virtio PMD X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 25 Jul 2014 08:04:34 -0000 > -----Original Message----- > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun > Sent: Friday, July 25, 2014 2:03 PM > To: dev@dpdk.org > Subject: [dpdk-dev] [PATCH v2] virtio: Support mergeable buffer in virtio= PMD >=20 > v2 change: > - Resolve conflicts wiht the tip code; > - And resolve 2 issues: > -- fix mbuf leak when discard a uncompleted packet. > -- refine pkt.data to point to actual payload data start point. >=20 > v1 change: > This patch supports mergeable buffer feature in DPDK based virtio PMD, wh= ich > can > receive jumbo frame with larger size, like 3K, 4K or even 9K. >=20 > Signed-off-by: Changchun Ouyang > --- > lib/librte_pmd_virtio/virtio_ethdev.c | 20 ++-- > lib/librte_pmd_virtio/virtio_ethdev.h | 3 + > lib/librte_pmd_virtio/virtio_rxtx.c | 206 ++++++++++++++++++++++++++++= +--- > -- > 3 files changed, 194 insertions(+), 35 deletions(-) >=20 > diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c > b/lib/librte_pmd_virtio/virtio_ethdev.c > index b9f5529..535d798 100644 > --- a/lib/librte_pmd_virtio/virtio_ethdev.c > +++ b/lib/librte_pmd_virtio/virtio_ethdev.c > @@ -337,7 +337,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, > snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", > dev->data->port_id, queue_idx); > vq->virtio_net_hdr_mz =3D > rte_memzone_reserve_aligned(vq_name, > - vq_size * sizeof(struct virtio_net_hdr), > + vq_size * hw->vtnet_hdr_size, > socket_id, 0, CACHE_LINE_SIZE); > if (vq->virtio_net_hdr_mz =3D=3D NULL) { > rte_free(vq); > @@ -346,7 +346,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, > vq->virtio_net_hdr_mem =3D > vq->virtio_net_hdr_mz->phys_addr; > memset(vq->virtio_net_hdr_mz->addr, 0, > - vq_size * sizeof(struct virtio_net_hdr)); > + vq_size * hw->vtnet_hdr_size); > } else if (queue_type =3D=3D VTNET_CQ) { > /* Allocate a page for control vq command, data and status */ > snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", > @@ -571,9 +571,6 @@ virtio_negotiate_features(struct virtio_hw *hw) > mask |=3D VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | > VIRTIO_NET_F_GUEST_ECN; > mask |=3D VTNET_LRO_FEATURES; >=20 > - /* rx_mbuf should not be in multiple merged segments */ > - mask |=3D VIRTIO_NET_F_MRG_RXBUF; > - > /* not negotiating INDIRECT descriptor table support */ > mask |=3D VIRTIO_RING_F_INDIRECT_DESC; >=20 > @@ -746,7 +743,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver > *eth_drv, > } >=20 > eth_dev->dev_ops =3D &virtio_eth_dev_ops; > - eth_dev->rx_pkt_burst =3D &virtio_recv_pkts; > eth_dev->tx_pkt_burst =3D &virtio_xmit_pkts; >=20 > if (rte_eal_process_type() =3D=3D RTE_PROC_SECONDARY) > @@ -801,10 +797,13 @@ eth_virtio_dev_init(__rte_unused struct eth_driver > *eth_drv, > virtio_negotiate_features(hw); >=20 > /* Setting up rx_header size for the device */ > - if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) > + if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { > + eth_dev->rx_pkt_burst =3D &virtio_recv_mergeable_pkts; > hw->vtnet_hdr_size =3D sizeof(struct virtio_net_hdr_mrg_rxbuf); > - else > + } else { > + eth_dev->rx_pkt_burst =3D &virtio_recv_pkts; > hw->vtnet_hdr_size =3D sizeof(struct virtio_net_hdr); > + } >=20 > /* Allocate memory for storing MAC addresses */ > eth_dev->data->mac_addrs =3D rte_zmalloc("virtio", ETHER_ADDR_LEN, > 0); > @@ -1009,7 +1008,7 @@ static void virtio_dev_free_mbufs(struct rte_eth_de= v > *dev) >=20 > while ((buf =3D (struct rte_mbuf *)virtqueue_detatch_unused( > dev->data->rx_queues[i])) !=3D NULL) { > - rte_pktmbuf_free_seg(buf); > + rte_pktmbuf_free(buf); > mbuf_num++; > } >=20 > @@ -1028,7 +1027,8 @@ static void virtio_dev_free_mbufs(struct rte_eth_de= v > *dev) > mbuf_num =3D 0; > while ((buf =3D (struct rte_mbuf *)virtqueue_detatch_unused( > dev->data->tx_queues[i])) !=3D NULL) { > - rte_pktmbuf_free_seg(buf); > + rte_pktmbuf_free(buf); > + > mbuf_num++; > } >=20 > diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h > b/lib/librte_pmd_virtio/virtio_ethdev.h > index 858e644..d2e1eed 100644 > --- a/lib/librte_pmd_virtio/virtio_ethdev.h > +++ b/lib/librte_pmd_virtio/virtio_ethdev.h > @@ -104,6 +104,9 @@ int virtio_dev_tx_queue_setup(struct rte_eth_dev *de= v, > uint16_t tx_queue_id, > uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, > uint16_t nb_pkts); >=20 > +uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, > + uint16_t nb_pkts); > + > uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, > uint16_t nb_pkts); >=20 > diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c > b/lib/librte_pmd_virtio/virtio_rxtx.c > index fcd8bd1..3d81b34 100644 > --- a/lib/librte_pmd_virtio/virtio_rxtx.c > +++ b/lib/librte_pmd_virtio/virtio_rxtx.c > @@ -146,6 +146,7 @@ static inline int > virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *coo= kie) > { > struct vq_desc_extra *dxp; > + struct virtio_hw *hw =3D vq->hw; > struct vring_desc *start_dp; > uint16_t needed =3D 1; > uint16_t head_idx, idx; > @@ -165,9 +166,11 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, > struct rte_mbuf *cookie) > dxp->ndescs =3D needed; >=20 > start_dp =3D vq->vq_ring.desc; > - start_dp[idx].addr =3D > - (uint64_t) (cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM > - sizeof(struct virtio_net_hdr)); > - start_dp[idx].len =3D cookie->buf_len - RTE_PKTMBUF_HEADROOM + > sizeof(struct virtio_net_hdr); > + start_dp[idx].addr =3D > + (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM > + - hw->vtnet_hdr_size); > + start_dp[idx].len =3D > + cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw- > >vtnet_hdr_size; > start_dp[idx].flags =3D VRING_DESC_F_WRITE; > idx =3D start_dp[idx].next; > vq->vq_desc_head_idx =3D idx; > @@ -184,8 +187,10 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struc= t > rte_mbuf *cookie) > { > struct vq_desc_extra *dxp; > struct vring_desc *start_dp; > - uint16_t needed =3D 2; > + uint16_t seg_num =3D cookie->pkt.nb_segs; > + uint16_t needed =3D 1 + seg_num; > uint16_t head_idx, idx; > + uint16_t head_size =3D txvq->hw->vtnet_hdr_size; >=20 > if (unlikely(txvq->vq_free_cnt =3D=3D 0)) > return -ENOSPC; > @@ -198,19 +203,25 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, > struct rte_mbuf *cookie) > idx =3D head_idx; > dxp =3D &txvq->vq_descx[idx]; > if (dxp->cookie !=3D NULL) > - rte_pktmbuf_free_seg(dxp->cookie); > + rte_pktmbuf_free(dxp->cookie); > dxp->cookie =3D (void *)cookie; > dxp->ndescs =3D needed; >=20 > start_dp =3D txvq->vq_ring.desc; > - start_dp[idx].addr =3D > - txvq->virtio_net_hdr_mem + idx * sizeof(struct virtio_net_hdr); > - start_dp[idx].len =3D sizeof(struct virtio_net_hdr); > + start_dp[idx].addr =3D > + txvq->virtio_net_hdr_mem + idx * head_size; > + start_dp[idx].len =3D (uint32_t)head_size; > start_dp[idx].flags =3D VRING_DESC_F_NEXT; > - idx =3D start_dp[idx].next; > - start_dp[idx].addr =3D RTE_MBUF_DATA_DMA_ADDR(cookie); > - start_dp[idx].len =3D cookie->pkt.data_len; > - start_dp[idx].flags =3D 0; > + > + for (; ((seg_num > 0) && (cookie !=3D NULL)); seg_num--) { > + idx =3D start_dp[idx].next; > + start_dp[idx].addr =3D RTE_MBUF_DATA_DMA_ADDR(cookie); > + start_dp[idx].len =3D cookie->pkt.data_len; > + start_dp[idx].flags =3D VRING_DESC_F_NEXT; > + cookie =3D cookie->pkt.next; > + } > + > + start_dp[idx].flags &=3D ~VRING_DESC_F_NEXT; > idx =3D start_dp[idx].next; > txvq->vq_desc_head_idx =3D idx; > if (txvq->vq_desc_head_idx =3D=3D VQ_RING_DESC_CHAIN_END) > @@ -284,7 +295,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int > queue_type) > error =3D virtqueue_enqueue_recv_refill(vq, m); >=20 > if (error) { > - rte_pktmbuf_free_seg(m); > + rte_pktmbuf_free(m); > break; > } > nbufs++; > @@ -423,7 +434,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct > rte_mbuf *m) > error =3D virtqueue_enqueue_recv_refill(vq, m); > if (unlikely(error)) { > RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); > - rte_pktmbuf_free_seg(m); > + rte_pktmbuf_free(m); > } > } >=20 > @@ -471,17 +482,158 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, uint16_t nb_pkts) >=20 > rxm->pkt.in_port =3D rxvq->port_id; > rxm->pkt.data =3D (char *)rxm->buf_addr + > RTE_PKTMBUF_HEADROOM; > + > rxm->pkt.nb_segs =3D 1; > rxm->pkt.next =3D NULL; > - rxm->pkt.pkt_len =3D (uint32_t)(len[i] > - - sizeof(struct virtio_net_hdr)); > - rxm->pkt.data_len =3D (uint16_t)(len[i] > - - sizeof(struct virtio_net_hdr)); > + rxm->pkt.pkt_len =3D (uint32_t)(len[i] - hw->vtnet_hdr_size); > + rxm->pkt.data_len =3D (uint16_t)(len[i] - hw->vtnet_hdr_size); >=20 > VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len); >=20 > rx_pkts[nb_rx++] =3D rxm; > - rxvq->bytes +=3D len[i] - sizeof(struct virtio_net_hdr); > + rxvq->bytes +=3D rx_pkts[nb_rx - 1]->pkt.pkt_len; > + } > + > + rxvq->packets +=3D nb_rx; > + > + /* Allocate new mbuf for the used descriptor */ > + error =3D ENOSPC; > + while (likely(!virtqueue_full(rxvq))) { > + new_mbuf =3D rte_rxmbuf_alloc(rxvq->mpool); > + if (unlikely(new_mbuf =3D=3D NULL)) { > + struct rte_eth_dev *dev > + =3D &rte_eth_devices[rxvq->port_id]; > + dev->data->rx_mbuf_alloc_failed++; > + break; > + } > + error =3D virtqueue_enqueue_recv_refill(rxvq, new_mbuf); > + if (unlikely(error)) { > + rte_pktmbuf_free(new_mbuf); > + break; > + } > + nb_enqueued++; > + } > + > + if (likely(nb_enqueued)) { > + if (unlikely(virtqueue_kick_prepare(rxvq))) { > + virtqueue_notify(rxvq); > + PMD_RX_LOG(DEBUG, "Notified\n"); > + } > + } > + > + vq_update_avail_idx(rxvq); > + > + return nb_rx; > +} > + > +uint16_t > +virtio_recv_mergeable_pkts(void *rx_queue, > + struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts) > +{ > + struct virtqueue *rxvq =3D rx_queue; > + struct virtio_hw *hw =3D rxvq->hw; > + struct rte_mbuf *rxm, *new_mbuf; > + uint16_t nb_used, num, nb_rx =3D 0; > + uint32_t len[VIRTIO_MBUF_BURST_SZ]; > + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; > + struct rte_mbuf *prev; > + int error; > + uint32_t i =3D 0, nb_enqueued =3D 0; > + uint32_t seg_num =3D 0; > + uint16_t extra_idx =3D 0; > + > + nb_used =3D VIRTQUEUE_NUSED(rxvq); > + > + rmb(); > + > + if (nb_used =3D=3D 0) > + return 0; > + > + while (i < nb_used) { > + struct virtio_net_hdr_mrg_rxbuf *header; > + char *head_ptr; > + > + if (nb_rx >=3D nb_pkts) > + break; > + > + num =3D virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1); > + i +=3D num; > + > + PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, > num); > + PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]); > + > + rxm =3D rcv_pkts[0]; > + extra_idx =3D 0; > + > + if (unlikely(len[0] > + < (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) > { > + PMD_RX_LOG(ERR, "Packet drop\n"); > + nb_enqueued++; > + virtio_discard_rxbuf(rxvq, rxm); > + rxvq->errors++; > + continue; > + } > + > + head_ptr =3D (char *)rxm->pkt.data; > + head_ptr -=3D hw->vtnet_hdr_size; > + header =3D (struct virtio_net_hdr_mrg_rxbuf *)head_ptr; > + seg_num =3D header->num_buffers; > + > + if (seg_num =3D=3D 0) > + seg_num =3D 1; > + > + rxm->pkt.data =3D (char *)rxm->buf_addr + > RTE_PKTMBUF_HEADROOM; > + rxm->pkt.nb_segs =3D seg_num; > + rxm->pkt.next =3D NULL; > + rxm->pkt.pkt_len =3D (uint32_t)(len[0] - hw->vtnet_hdr_size); > + rxm->pkt.data_len =3D (uint16_t)(len[0] - hw->vtnet_hdr_size); > + > + rxm->pkt.in_port =3D rxvq->port_id; > + rx_pkts[nb_rx] =3D rxm; > + > + prev =3D rxm; > + > + VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len); Here it might cause segmentation fault when debug is enabled. > + > + /* > + * Get extra segments for current uncompleted packet. > + */ > + if (VIRTQUEUE_NUSED(rxvq) >=3D seg_num - 1) { > + uint32_t rx_num =3D virtqueue_dequeue_burst_rx(rxvq, > + rcv_pkts, len, seg_num - 1); > + i +=3D rx_num; > + } else { > + PMD_RX_LOG(ERR, "No enough segments for > packet.\n"); > + nb_enqueued++; > + virtio_discard_rxbuf(rxvq, rxm); > + rxvq->errors++; > + break; Here we should figure out if virtio net spec specify the behavior. If the b= ackend wants to en-queue a merge able packet, but there isn't enough availa= ble descriptors, could it do it in a non-atomic way, i.e., firstly transfer= some of them, update the used->idx and later transfer the rest of them?=20 If that is the case, the handling here will cause the content of next segm= ent later en-queued to be treated as virtio merge-able header. > + } > + > + while (extra_idx < seg_num - 1) { > + rxm =3D rcv_pkts[extra_idx]; > + > + rxm->pkt.data =3D > + (char *)rxm->buf_addr + > RTE_PKTMBUF_HEADROOM > + - hw->vtnet_hdr_size; > + rxm->pkt.next =3D NULL; > + rxm->pkt.pkt_len =3D (uint32_t)(len[extra_idx]); > + rxm->pkt.data_len =3D (uint16_t)(len[extra_idx]); > + > + if (prev) > + prev->pkt.next =3D rxm; > + > + prev =3D rxm; > + rx_pkts[nb_rx]->pkt.pkt_len +=3D rxm->pkt.pkt_len; > + > + extra_idx++; > + > + VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len); > + }; > + > + rxvq->bytes +=3D rx_pkts[nb_rx]->pkt.pkt_len; > + nb_rx++; > } >=20 > rxvq->packets +=3D nb_rx; > @@ -498,11 +650,12 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, uint16_t nb_pkts) > } > error =3D virtqueue_enqueue_recv_refill(rxvq, new_mbuf); > if (unlikely(error)) { > - rte_pktmbuf_free_seg(new_mbuf); > + rte_pktmbuf_free(new_mbuf); > break; > } > nb_enqueued++; > } > + > if (likely(nb_enqueued)) { > if (unlikely(virtqueue_kick_prepare(rxvq))) { > virtqueue_notify(rxvq); > @@ -536,12 +689,15 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, uint16_t nb_pkts) > num =3D (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : > VIRTIO_MBUF_BURST_SZ); >=20 > while (nb_tx < nb_pkts) { > - if (virtqueue_full(txvq) && num) { > - virtqueue_dequeue_pkt_tx(txvq); > - num--; > + int need =3D tx_pkts[nb_tx]->pkt.nb_segs - txvq->vq_free_cnt; > + if ((need > 0) && (num > 0)) { > + do { > + virtqueue_dequeue_pkt_tx(txvq); > + num--; > + } while (num > 0); > } >=20 > - if (!virtqueue_full(txvq)) { > + if (tx_pkts[nb_tx]->pkt.nb_segs <=3D txvq->vq_free_cnt) { > txm =3D tx_pkts[nb_tx]; > /* Enqueue Packet buffers */ > error =3D virtqueue_enqueue_xmit(txvq, txm); > @@ -555,7 +711,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, uint16_t nb_pkts) > break; > } > nb_tx++; > - txvq->bytes +=3D txm->pkt.data_len; > + txvq->bytes +=3D txm->pkt.pkt_len; > } else { > PMD_TX_LOG(ERR, "No free tx descriptors to transmit"); > break; > -- > 1.8.4.2