From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 02B685F1B for ; Wed, 15 May 2019 10:49:50 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 May 2019 01:49:50 -0700 X-ExtLoop1: 1 Received: from yexl-server.sh.intel.com (HELO NPG-DPDK-XDP-yexl-server.sh.intel.com) ([10.67.110.206]) by fmsmga008.fm.intel.com with ESMTP; 15 May 2019 01:49:49 -0700 From: Xiaolong Ye To: Xiaolong Ye , Qi Zhang , John McNamara , Marko Kovacevic Cc: Karlsson Magnus , Topel Bjorn , dev@dpdk.org Date: Wed, 15 May 2019 16:38:42 +0800 Message-Id: <20190515083842.15116-4-xiaolong.ye@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190515083842.15116-1-xiaolong.ye@intel.com> References: <20190515083842.15116-1-xiaolong.ye@intel.com> Subject: [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 May 2019 08:49:51 -0000 This patch enables busy-poll support for AF_XDP pmd. With busy-poll, the kernel driver is executed in process context by calling the poll() syscall. The main advantage of busy-poll feature is that all processing occurs on a single core. This eliminates the core-to-core cache transfers that occur between the application and the softirqd processing on another core. The drawback of busy-poll is that it will downgrade the max throughput due to syscall, but from a per-core perspective, the performance is better as normal mode runs on two cores and busy-poll only runs on a single core. Signed-off-by: Xiaolong Ye --- doc/guides/nics/af_xdp.rst | 1 + drivers/net/af_xdp/rte_eth_af_xdp.c | 48 ++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 18defcda3..e42065170 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -29,6 +29,7 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); * ``start_queue`` - starting netdev queue id (optional, default 0); * ``queue_count`` - total netdev queue number (optional, default 1); +* ``busy_poll_size`` - busy poll batch size (optional, default 0); * ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 9a4510701..1e46a4ef4 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ struct xsk_umem_info { struct rte_ring *buf_ring; const struct rte_memzone *mz; int pmd_zc; + int busy_poll; }; struct rx_stats { @@ -114,6 +116,7 @@ struct pmd_internals { int queue_cnt; int pmd_zc; + int busy_poll_size; struct ether_addr eth_addr; struct xsk_umem_info *umem; struct rte_mempool *mb_pool_share; @@ -126,12 +129,14 @@ struct pmd_internals { #define ETH_AF_XDP_START_QUEUE_ARG "start_queue" #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" #define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" +#define ETH_AF_XDP_BUSY_POLL_SIZE_ARG "busy_poll_size" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, ETH_AF_XDP_START_QUEUE_ARG, ETH_AF_XDP_QUEUE_COUNT_ARG, ETH_AF_XDP_PMD_ZC_ARG, + ETH_AF_XDP_BUSY_POLL_SIZE_ARG, NULL }; @@ -191,6 +196,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_ring_cons *rx = &rxq->rx; struct xsk_umem_info *umem = rxq->umem; struct xsk_ring_prod *fq = &umem->fq; + struct pollfd pfds[1]; uint32_t idx_rx = 0; uint32_t free_thresh = fq->size >> 1; int pmd_zc = umem->pmd_zc; @@ -199,6 +205,15 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) unsigned long rx_bytes = 0; int rcvd, i; + if (umem->busy_poll) { + memset(pfds, 0, sizeof(pfds)); + pfds[0].fd = xsk_socket__fd(rxq->xsk); + pfds[0].events = POLLIN; + + if (poll(pfds, 1, 0) <= 0) + return 0; + } + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0)) @@ -305,12 +320,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct pkt_tx_queue *txq = queue; struct xsk_umem_info *umem = txq->pair->umem; struct rte_mbuf *mbuf; + struct pollfd pfds[1]; int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; uint32_t idx_tx; + if (umem->busy_poll) { + memset(pfds, 0, sizeof(pfds)); + pfds[0].fd = xsk_socket__fd(txq->pair->xsk); + pfds[0].events = POLLOUT; + if (poll(pfds, 1, 0) <= 0) + return 0; + if (!(pfds[0].revents & POLLOUT)) + return 0; + } + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); pull_umem_cq(umem, nb_pkts); @@ -615,6 +641,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, cfg.rx_size = ring_size; cfg.tx_size = ring_size; cfg.libbpf_flags = 0; + cfg.busy_poll = internals->busy_poll_size; cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; cfg.bind_flags = 0; ret = xsk_socket__create(&rxq->xsk, internals->if_name, @@ -680,10 +707,14 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, internals->umem = rxq->umem; internals->umem->pmd_zc = internals->pmd_zc; + internals->umem->busy_poll = internals->busy_poll_size ? 1 : 0; if (internals->umem->pmd_zc) AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); + if (internals->umem->busy_poll) + AF_XDP_LOG(INFO, "Busy poll enabled.\n"); + dev->data->rx_queues[rx_queue_id] = rxq; return 0; @@ -818,7 +849,7 @@ parse_name_arg(const char *key __rte_unused, static int parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, - int *queue_cnt, int *pmd_zc) + int *queue_cnt, int *pmd_zc, int *busy_poll_size) { int ret; @@ -844,6 +875,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUSY_POLL_SIZE_ARG, + &parse_integer_arg, busy_poll_size); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -881,7 +917,8 @@ get_iface_info(const char *if_name, static struct rte_eth_dev * init_internals(struct rte_vdev_device *dev, const char *if_name, - int start_queue_idx, int queue_cnt, int pmd_zc) + int start_queue_idx, int queue_cnt, int pmd_zc, + int busy_poll_size) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -897,6 +934,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, internals->start_queue_idx = start_queue_idx; internals->queue_cnt = queue_cnt; internals->pmd_zc = pmd_zc; + internals->busy_poll_size = busy_poll_size; strlcpy(internals->if_name, if_name, IFNAMSIZ); for (i = 0; i < queue_cnt; i++) { @@ -941,6 +979,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) struct rte_eth_dev *eth_dev = NULL; const char *name; int pmd_zc = 0; + int busy_poll_size = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -968,7 +1007,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) dev->device.numa_node = rte_socket_id(); if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, - &xsk_queue_cnt, &pmd_zc) < 0) { + &xsk_queue_cnt, &pmd_zc, &busy_poll_size) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -979,7 +1018,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) } eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, - xsk_queue_cnt, pmd_zc); + xsk_queue_cnt, pmd_zc, busy_poll_size); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1023,6 +1062,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " "start_queue= " "queue_count= " + "busy_poll_size= " "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log) -- 2.17.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id 51CF2A00E6 for ; Wed, 15 May 2019 10:50:09 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id AD9435F2B; Wed, 15 May 2019 10:49:53 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 02B685F1B for ; Wed, 15 May 2019 10:49:50 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 May 2019 01:49:50 -0700 X-ExtLoop1: 1 Received: from yexl-server.sh.intel.com (HELO NPG-DPDK-XDP-yexl-server.sh.intel.com) ([10.67.110.206]) by fmsmga008.fm.intel.com with ESMTP; 15 May 2019 01:49:49 -0700 From: Xiaolong Ye To: Xiaolong Ye , Qi Zhang , John McNamara , Marko Kovacevic Cc: Karlsson Magnus , Topel Bjorn , dev@dpdk.org Date: Wed, 15 May 2019 16:38:42 +0800 Message-Id: <20190515083842.15116-4-xiaolong.ye@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190515083842.15116-1-xiaolong.ye@intel.com> References: <20190515083842.15116-1-xiaolong.ye@intel.com> Subject: [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Content-Type: text/plain; charset="UTF-8" Message-ID: <20190515083842.oOcTptDwj8SsOAAIg8R18wbATL1LMCteIfGQzFiBKns@z> This patch enables busy-poll support for AF_XDP pmd. With busy-poll, the kernel driver is executed in process context by calling the poll() syscall. The main advantage of busy-poll feature is that all processing occurs on a single core. This eliminates the core-to-core cache transfers that occur between the application and the softirqd processing on another core. The drawback of busy-poll is that it will downgrade the max throughput due to syscall, but from a per-core perspective, the performance is better as normal mode runs on two cores and busy-poll only runs on a single core. Signed-off-by: Xiaolong Ye --- doc/guides/nics/af_xdp.rst | 1 + drivers/net/af_xdp/rte_eth_af_xdp.c | 48 ++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 18defcda3..e42065170 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -29,6 +29,7 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); * ``start_queue`` - starting netdev queue id (optional, default 0); * ``queue_count`` - total netdev queue number (optional, default 1); +* ``busy_poll_size`` - busy poll batch size (optional, default 0); * ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 9a4510701..1e46a4ef4 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ struct xsk_umem_info { struct rte_ring *buf_ring; const struct rte_memzone *mz; int pmd_zc; + int busy_poll; }; struct rx_stats { @@ -114,6 +116,7 @@ struct pmd_internals { int queue_cnt; int pmd_zc; + int busy_poll_size; struct ether_addr eth_addr; struct xsk_umem_info *umem; struct rte_mempool *mb_pool_share; @@ -126,12 +129,14 @@ struct pmd_internals { #define ETH_AF_XDP_START_QUEUE_ARG "start_queue" #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" #define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" +#define ETH_AF_XDP_BUSY_POLL_SIZE_ARG "busy_poll_size" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, ETH_AF_XDP_START_QUEUE_ARG, ETH_AF_XDP_QUEUE_COUNT_ARG, ETH_AF_XDP_PMD_ZC_ARG, + ETH_AF_XDP_BUSY_POLL_SIZE_ARG, NULL }; @@ -191,6 +196,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_ring_cons *rx = &rxq->rx; struct xsk_umem_info *umem = rxq->umem; struct xsk_ring_prod *fq = &umem->fq; + struct pollfd pfds[1]; uint32_t idx_rx = 0; uint32_t free_thresh = fq->size >> 1; int pmd_zc = umem->pmd_zc; @@ -199,6 +205,15 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) unsigned long rx_bytes = 0; int rcvd, i; + if (umem->busy_poll) { + memset(pfds, 0, sizeof(pfds)); + pfds[0].fd = xsk_socket__fd(rxq->xsk); + pfds[0].events = POLLIN; + + if (poll(pfds, 1, 0) <= 0) + return 0; + } + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0)) @@ -305,12 +320,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct pkt_tx_queue *txq = queue; struct xsk_umem_info *umem = txq->pair->umem; struct rte_mbuf *mbuf; + struct pollfd pfds[1]; int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; uint32_t idx_tx; + if (umem->busy_poll) { + memset(pfds, 0, sizeof(pfds)); + pfds[0].fd = xsk_socket__fd(txq->pair->xsk); + pfds[0].events = POLLOUT; + if (poll(pfds, 1, 0) <= 0) + return 0; + if (!(pfds[0].revents & POLLOUT)) + return 0; + } + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); pull_umem_cq(umem, nb_pkts); @@ -615,6 +641,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, cfg.rx_size = ring_size; cfg.tx_size = ring_size; cfg.libbpf_flags = 0; + cfg.busy_poll = internals->busy_poll_size; cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; cfg.bind_flags = 0; ret = xsk_socket__create(&rxq->xsk, internals->if_name, @@ -680,10 +707,14 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, internals->umem = rxq->umem; internals->umem->pmd_zc = internals->pmd_zc; + internals->umem->busy_poll = internals->busy_poll_size ? 1 : 0; if (internals->umem->pmd_zc) AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); + if (internals->umem->busy_poll) + AF_XDP_LOG(INFO, "Busy poll enabled.\n"); + dev->data->rx_queues[rx_queue_id] = rxq; return 0; @@ -818,7 +849,7 @@ parse_name_arg(const char *key __rte_unused, static int parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, - int *queue_cnt, int *pmd_zc) + int *queue_cnt, int *pmd_zc, int *busy_poll_size) { int ret; @@ -844,6 +875,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUSY_POLL_SIZE_ARG, + &parse_integer_arg, busy_poll_size); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -881,7 +917,8 @@ get_iface_info(const char *if_name, static struct rte_eth_dev * init_internals(struct rte_vdev_device *dev, const char *if_name, - int start_queue_idx, int queue_cnt, int pmd_zc) + int start_queue_idx, int queue_cnt, int pmd_zc, + int busy_poll_size) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -897,6 +934,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, internals->start_queue_idx = start_queue_idx; internals->queue_cnt = queue_cnt; internals->pmd_zc = pmd_zc; + internals->busy_poll_size = busy_poll_size; strlcpy(internals->if_name, if_name, IFNAMSIZ); for (i = 0; i < queue_cnt; i++) { @@ -941,6 +979,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) struct rte_eth_dev *eth_dev = NULL; const char *name; int pmd_zc = 0; + int busy_poll_size = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -968,7 +1007,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) dev->device.numa_node = rte_socket_id(); if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, - &xsk_queue_cnt, &pmd_zc) < 0) { + &xsk_queue_cnt, &pmd_zc, &busy_poll_size) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -979,7 +1018,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) } eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, - xsk_queue_cnt, pmd_zc); + xsk_queue_cnt, pmd_zc, busy_poll_size); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1023,6 +1062,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " "start_queue= " "queue_count= " + "busy_poll_size= " "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log) -- 2.17.1