From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id 16F0BA00E6 for ; Tue, 11 Jun 2019 18:17:25 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id DCCDB1D042; Tue, 11 Jun 2019 18:17:23 +0200 (CEST) Received: from mail-qt1-f196.google.com (mail-qt1-f196.google.com [209.85.160.196]) by dpdk.org (Postfix) with ESMTP id D286D1C591 for ; Tue, 11 Jun 2019 18:17:22 +0200 (CEST) Received: by mail-qt1-f196.google.com with SMTP id 33so7062023qtr.8 for ; Tue, 11 Jun 2019 09:17:22 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=mime-version:references:in-reply-to:from:date:message-id:subject:to :cc; bh=q+MLeNChxLInBboiUaI+RQPHS8xoZS3SoWw7yFayg0A=; b=otznVAF+rs1yyWGAb/wYbJmFqobMRXbJLuzlUYDeP3eVDkZQkwJqYNkBudJiEbz3CK ocLSE54pLXz/cvjS122arTRCOg0YwdkYiLybUDSH7Ky+qxK7jts7/A9/DTe3eHONSrkn 9kDjEJlM8cjuaYA+WJG26AkKuSeThTdiD1e5R0UWuHttaNvpZUrscbztWc3HfATw3Fyl 0sB/iYODYLf+pkWRAA43CEZuwgm8lqNGc65/J8trQ2s7CfVdT8iiybPGGKEsT4u7TMlT 3P8bgZfNxuHz+lunLEQdiiBtfEs2J/VqT5wxtV8yGR+TRAIpz4iyT0GvG65aU5h8wuAo nlNg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=q+MLeNChxLInBboiUaI+RQPHS8xoZS3SoWw7yFayg0A=; b=THIWuvqxMoFTSyHjx3X5w3SdXCdwX14QinVHtFvUm8Ym2B+nYaUNmP+Ko3A6uiHAzr OikNzB/zsVrS4p8Y/M9duP4epS50cB39YINDW8tujMtGDTSqhDQZeT3lXjJhazIsUieD pXoGAFcLeej9ZTB1KSk/twdolGJrKpj4t6OOgK9K9OzkzaBHyb87ZGwJ+jRuk/bPsnMf D8ucChT8ae2Luk7DPOrlUcrNy9XKyFuSYtE15tZYyU2kiPkG/LSluDiAiPKlP3N09ixi IHCXmuAaFO4gFXv1Grp05U70BZpI03lRhunym/opOBIhVrN8pbMxF9Nf/sxmsZYVYM3F 7lIA== X-Gm-Message-State: APjAAAXIaAUrldArnO5IOER2lxOb1TKoGextt04fFHEBlJt5oapCoaRx /ojUrz2O8C6wgoJCz57leZKQG4RVaWUXyYJvNjs= X-Google-Smtp-Source: APXvYqzBswd+Oxshlb0Z5PAzQGbnLeaDfkRJNWZrdFX9keY4qyRjy0l35+HHE0/xRlmB68v5+c9uvQuJYYdSSMd7ra4= X-Received: by 2002:ac8:2a46:: with SMTP id l6mr64293244qtl.309.1560269842074; Tue, 11 Jun 2019 09:17:22 -0700 (PDT) MIME-Version: 1.0 References: <20190515083842.15116-1-xiaolong.ye@intel.com> <20190530090707.36290-1-xiaolong.ye@intel.com> <20190530090707.36290-2-xiaolong.ye@intel.com> In-Reply-To: <20190530090707.36290-2-xiaolong.ye@intel.com> From: William Tu Date: Tue, 11 Jun 2019 09:16:45 -0700 Message-ID: To: Xiaolong Ye Cc: Qi Zhang , John McNamara , Marko Kovacevic , Karlsson Magnus , Topel Bjorn , dev@dpdk.org Content-Type: text/plain; charset="UTF-8" Subject: Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" On Thu, May 30, 2019 at 2:16 AM Xiaolong Ye wrote: > > Implement zero copy of af_xdp pmd through mbuf's external memory > mechanism to achieve high performance. > > This patch also provides a new parameter "pmd_zero_copy" for user, so they > can choose to enable zero copy of af_xdp pmd or not. > > To be clear, "zero copy" here is different from the "zero copy mode" of > AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk > application. > > Suggested-by: Varghese Vipin > Suggested-by: Tummala Sivaprasad > Suggested-by: Olivier Matz > Signed-off-by: Xiaolong Ye > --- > doc/guides/nics/af_xdp.rst | 1 + > drivers/net/af_xdp/rte_eth_af_xdp.c | 104 +++++++++++++++++++++------- > 2 files changed, 79 insertions(+), 26 deletions(-) > > diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst > index 175038153..0bd4239fe 100644 > --- a/doc/guides/nics/af_xdp.rst > +++ b/doc/guides/nics/af_xdp.rst > @@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK. > > * ``iface`` - name of the Kernel interface to attach to (required); > * ``queue`` - netdev queue id (optional, default 0); > +* ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); > > Prerequisites > ------------- > diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c > index 35c72272c..014cd5691 100644 > --- a/drivers/net/af_xdp/rte_eth_af_xdp.c > +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c > @@ -70,6 +70,7 @@ struct xsk_umem_info { > struct xsk_umem *umem; > struct rte_ring *buf_ring; > const struct rte_memzone *mz; > + int pmd_zc; > }; > > struct rx_stats { > @@ -109,8 +110,8 @@ struct pmd_internals { > int if_index; > char if_name[IFNAMSIZ]; > uint16_t queue_idx; > + int pmd_zc; > struct ether_addr eth_addr; > - struct xsk_umem_info *umem; > struct rte_mempool *mb_pool_share; > > struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS]; > @@ -119,10 +120,12 @@ struct pmd_internals { > > #define ETH_AF_XDP_IFACE_ARG "iface" > #define ETH_AF_XDP_QUEUE_IDX_ARG "queue" > +#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" > > static const char * const valid_arguments[] = { > ETH_AF_XDP_IFACE_ARG, > ETH_AF_XDP_QUEUE_IDX_ARG, > + ETH_AF_XDP_PMD_ZC_ARG, > NULL > }; > > @@ -166,6 +169,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) > return 0; > } > > +static void > +umem_buf_release_to_fq(void *addr, void *opaque) > +{ > + struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; > + uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; > + > + rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); > +} > + > static uint16_t > eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > { > @@ -175,6 +187,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > struct xsk_ring_prod *fq = &umem->fq; > uint32_t idx_rx = 0; > uint32_t free_thresh = fq->size >> 1; > + int pmd_zc = umem->pmd_zc; > struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; > unsigned long dropped = 0; > unsigned long rx_bytes = 0; > @@ -197,19 +210,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > uint64_t addr; > uint32_t len; > void *pkt; > + uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; > + struct rte_mbuf_ext_shared_info *shinfo; > > desc = xsk_ring_cons__rx_desc(rx, idx_rx++); > addr = desc->addr; > len = desc->len; > pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); > > - rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); > + if (pmd_zc) { > + shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, > + &buf_len, umem_buf_release_to_fq, umem); > + > + rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, > + shinfo); > + } else { > + rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), > + pkt, len); > + rte_ring_enqueue(umem->buf_ring, (void *)addr); > + } > rte_pktmbuf_pkt_len(mbufs[i]) = len; > rte_pktmbuf_data_len(mbufs[i]) = len; > rx_bytes += len; > bufs[i] = mbufs[i]; > - > - rte_ring_enqueue(umem->buf_ring, (void *)addr); > } > > xsk_ring_cons__release(rx, rcvd); > @@ -262,12 +285,21 @@ kick_tx(struct pkt_tx_queue *txq) > pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); > } > > +static inline bool > +in_umem_range(struct xsk_umem_info *umem, uint64_t addr) > +{ > + uint64_t mz_base_addr = umem->mz->addr_64; > + > + return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; > +} > + > static uint16_t > eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > { > struct pkt_tx_queue *txq = queue; > struct xsk_umem_info *umem = txq->pair->umem; > struct rte_mbuf *mbuf; > + int pmd_zc = umem->pmd_zc; > void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; > unsigned long tx_bytes = 0; > int i; > @@ -294,16 +326,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > > desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); > mbuf = bufs[i]; > - > - desc->addr = (uint64_t)addrs[i]; > desc->len = mbuf->pkt_len; > - pkt = xsk_umem__get_data(umem->mz->addr, > - desc->addr); > - rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), > - desc->len); > - tx_bytes += mbuf->pkt_len; > > - rte_pktmbuf_free(mbuf); > + /* > + * We need to make sure the external mbuf address is within > + * current port's umem memzone range > + */ > + if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && > + in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { > + desc->addr = (uint64_t)mbuf->buf_addr - > + umem->mz->addr_64; > + mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, > + (uint64_t)addrs[i]); > + } else { > + desc->addr = (uint64_t)addrs[i]; > + pkt = xsk_umem__get_data(umem->mz->addr, > + desc->addr); > + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), > + desc->len); > + } > + tx_bytes += mbuf->pkt_len; > } > > xsk_ring_prod__submit(&txq->tx, nb_pkts); > @@ -313,6 +355,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > txq->stats.tx_pkts += nb_pkts; > txq->stats.tx_bytes += tx_bytes; > > + for (i = 0; i < nb_pkts; i++) > + rte_pktmbuf_free(bufs[i]); > + Is it ok to free the mbuf here? If the AF_XDP is running pmd_zc=true, the packet mbuf is still in the tx ring and might not be sent out yet. Regards, William > return nb_pkts; > } > > @@ -454,18 +499,16 @@ eth_dev_close(struct rte_eth_dev *dev) > if (rxq->umem == NULL) > break; > xsk_socket__delete(rxq->xsk); > + (void)xsk_umem__delete(rxq->umem->umem); > + xdp_umem_destroy(rxq->umem); > } > > - (void)xsk_umem__delete(internals->umem->umem); > - > /* > * MAC is not allocated dynamically, setting it to NULL would prevent > * from releasing it in rte_eth_dev_release_port. > */ > dev->data->mac_addrs = NULL; > > - xdp_umem_destroy(internals->umem); > - > remove_xdp_program(internals); > } > > @@ -639,7 +682,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, > goto err; > } > > - internals->umem = rxq->umem; > + rxq->umem->pmd_zc = internals->pmd_zc; > > dev->data->rx_queues[rx_queue_id] = rxq; > return 0; > @@ -775,9 +818,8 @@ parse_name_arg(const char *key __rte_unused, > } > > static int > -parse_parameters(struct rte_kvargs *kvlist, > - char *if_name, > - int *queue_idx) > +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, > + int *pmd_zc) > { > int ret; > > @@ -791,6 +833,11 @@ parse_parameters(struct rte_kvargs *kvlist, > if (ret < 0) > goto free_kvlist; > > + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, > + &parse_integer_arg, pmd_zc); > + if (ret < 0) > + goto free_kvlist; > + > free_kvlist: > rte_kvargs_free(kvlist); > return ret; > @@ -827,9 +874,8 @@ get_iface_info(const char *if_name, > } > > static struct rte_eth_dev * > -init_internals(struct rte_vdev_device *dev, > - const char *if_name, > - int queue_idx) > +init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, > + int pmd_zc) > { > const char *name = rte_vdev_device_name(dev); > const unsigned int numa_node = dev->device.numa_node; > @@ -843,6 +889,7 @@ init_internals(struct rte_vdev_device *dev, > return NULL; > > internals->queue_idx = queue_idx; > + internals->pmd_zc = pmd_zc; > strlcpy(internals->if_name, if_name, IFNAMSIZ); > > for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { > @@ -868,6 +915,9 @@ init_internals(struct rte_vdev_device *dev, > /* Let rte_eth_dev_close() release the port resources. */ > eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; > > + if (internals->pmd_zc) > + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); > + > return eth_dev; > > err: > @@ -883,6 +933,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) > int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX; > struct rte_eth_dev *eth_dev = NULL; > const char *name; > + int pmd_zc = 0; > > AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", > rte_vdev_device_name(dev)); > @@ -909,7 +960,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) > if (dev->device.numa_node == SOCKET_ID_ANY) > dev->device.numa_node = rte_socket_id(); > > - if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) { > + if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) { > AF_XDP_LOG(ERR, "Invalid kvargs value\n"); > return -EINVAL; > } > @@ -919,7 +970,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) > return -EINVAL; > } > > - eth_dev = init_internals(dev, if_name, xsk_queue_idx); > + eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc); > if (eth_dev == NULL) { > AF_XDP_LOG(ERR, "Failed to init internals\n"); > return -1; > @@ -961,7 +1012,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = { > RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); > RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, > "iface= " > - "queue= "); > + "queue= " > + "pmd_zero_copy=<0|1>"); > > RTE_INIT(af_xdp_init_log) > { > -- > 2.17.1 >