* [dpdk-dev] [RFC PATCH 1/6] eal: common direct ring access API
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
@ 2014-11-25 14:11 ` Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 2/6] eal: direct ring access support by linux af_packet Cunming Liang
` (6 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Cunming Liang @ 2014-11-25 14:11 UTC (permalink / raw)
To: dev
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_eal/common/Makefile | 5 +
lib/librte_eal/common/include/rte_pci_bifurc.h | 186 +++++++++++++++++++++++++
2 files changed, 191 insertions(+)
create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 499ba4d..6b2e231 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -52,6 +52,11 @@ GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h
ARCH_DIR ?= $(RTE_ARCH)
ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
+ifeq ($(CONFIG_RTE_LIBRTE_EAL_LINUXAPP),y)
+INC += rte_pci_bifurc.h
+endif
+
+
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
$(addprefix include/arch/$(ARCH_DIR)/,$(ARCH_INC))
diff --git a/lib/librte_eal/common/include/rte_pci_bifurc.h b/lib/librte_eal/common/include/rte_pci_bifurc.h
new file mode 100644
index 0000000..ad93124
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_bifurc.h
@@ -0,0 +1,186 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_BIFURC_H_
+#define _RTE_PCI_BIFURC_H_
+
+/**
+ * @file
+ *
+ * RTE PCI BIFURC Interface
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_pci.h>
+#include <rte_devargs.h>
+
+
+/**
+ * Open a socket for direct ring access.
+ * For those socket API support direct ring access,
+ * it opens a socket for such address family.
+ *
+ * @param sockfd
+ * A pinter to socket file descriptor to get the open sockfd.
+ * @return
+ * 0 on success, negative on error.
+ *
+ */
+int rte_eal_bifurc_open(int *sockfd);
+
+/**
+ * Bind a net interface to specific socket for direct ring access.
+ *
+ * @param sockfd
+ * The Socket file descriptor to bind the net interface with.
+ * @param if_index
+ * The net interface used for direct ring access.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_bifurc_bind(int sockfd, int if_index);
+
+/**
+ * Map ring specific register space for direct access.
+ *
+ * @param sockfd
+ * The socket file descriptor to get the ring address mapping from.
+ * @param addr
+ * The pointer to the virtual address got from mapping.
+ * @param size
+ * The pointer to the memory size.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_bifurc_map(int sockfd, void **addr, uint32_t *size);
+
+/**
+ * Unmap ring specific register space for direct access.
+ *
+ * @param sockfd
+ * The socket file descriptor to release the ring address mapping.
+ * @param addr
+ * The unmap virtual address.
+ */
+void rte_eal_bifurc_unmap(int sockfd, void *addr);
+
+/**
+ * Split the ring pairs from the net device.
+ * For those net device support direct ring access,
+ * will split the ring and exclusively reserve the resource.
+ *
+ * @param sockfd
+ * The socket fd owns the split ring resource.
+ * @param nb_qp
+ * Request number of queue pair.
+ * @param qp_start
+ * The first queue pair id.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_bifurc_split(int sockfd, uint32_t *nb_qp, uint32_t *qp_start);
+
+/**
+ * Retire the ring pairs to the net device.
+ *
+ * @param sockfd
+ * The socket fd owns the ring resource.
+ * @param nb_qp
+ * Request number of queue pair.
+ * @param qp_start
+ * The first queue pair id.
+ */
+void rte_eal_bifurc_retire(int sockfd, uint32_t nb_qp, uint32_t qp_start);
+
+/**
+ * Utility function to initial pci info in rte_pci_device by net device.
+ *
+ * @param sockfd
+ * The socket fd stands for the binding net device.
+ * @param pci_dev
+ * The pointer of pci device to hook with the net device.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_bifurc_set_pci(int sockfd, struct rte_pci_device *pci_dev);
+
+/**
+ * Utility function to get net interface info by iface name.
+ *
+ * @param sockfd
+ * The socket fd which already bind with the net device.
+ * @param iface_name
+ * The string for iface name.
+ * @param if_index
+ * The pointer to the index of such net interface/device.
+ * @param hwaddr
+ * The pointer to the MAC address of such net device.
+ * @param mtu
+ * The pointer to the MTU os such net device.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_bifurc_get_ifinfo(int sockfd, char *iface_name,
+ int *if_index, uint8_t *hwaddr, int *mtu);
+
+/**
+ * Utility function to get/alloc a devargs instance.
+ *
+ * @param drv_name
+ * The string of driver name in devargs.
+ * @param args
+ * The args in devargs.
+ * @return
+ * 0 on success, negative on error.
+ */
+struct rte_devargs *
+rte_eal_bifurc_get_devargs(const char *drv_name, const char *args);
+
+/**
+ * Utility function to free the devargs instance.
+ *
+ * @param devargs
+ * The devargs instance to free.
+ *
+ */
+void rte_eal_bifurc_put_devargs(struct rte_devargs *devargs);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCI_BIFURC_H_ */
--
1.8.1.4
^ permalink raw reply [flat|nested] 24+ messages in thread
* [dpdk-dev] [RFC PATCH 2/6] eal: direct ring access support by linux af_packet
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 1/6] eal: common direct ring access API Cunming Liang
@ 2014-11-25 14:11 ` Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 3/6] pci: allow VDEV as pci device during device driver probe Cunming Liang
` (5 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Cunming Liang @ 2014-11-25 14:11 UTC (permalink / raw)
To: dev
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_eal/linuxapp/eal/Makefile | 1 +
lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336 +++++++++++++++++++++++++++
2 files changed, 337 insertions(+)
create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 06c1dc5..f775203 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -61,6 +61,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_uio.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_bifurc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_timer.c
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c b/lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
new file mode 100644
index 0000000..94ad4df
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
@@ -0,0 +1,336 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_dev.h>
+#include <rte_pci.h>
+#include <rte_log.h>
+#include <rte_devargs.h>
+#include <rte_pci_bifurc.h>
+
+int
+rte_eal_bifurc_get_ifinfo(int sockfd, char *iface_name,
+ int *if_index, uint8_t *hwaddr, int *mtu)
+{
+ struct ifreq req;
+
+ if (iface_name == NULL)
+ return -1;
+
+ memset(&req, 0, sizeof(req));
+
+ /* request for ifindex */
+ if (if_index) {
+ memcpy(req.ifr_name, iface_name, IFNAMSIZ);
+ if (ioctl(sockfd, SIOCGIFINDEX, &req) == -1) {
+ RTE_LOG(ERR, EAL,
+ "%s: ioctl failed (SIOCGIFINDEX)\n",
+ iface_name);
+ return -1;
+ }
+ *if_index = req.ifr_ifindex;
+ }
+
+ /* request for hwaddr */
+ if (hwaddr) {
+ if (ioctl(sockfd, SIOCGIFHWADDR, &req) == -1) {
+ RTE_LOG(ERR, EAL,
+ "%s: ioctl failed (SIOCGIFHWADDR)\n",
+ iface_name);
+ return -1;
+ }
+ memcpy(hwaddr, req.ifr_hwaddr.sa_data, IFHWADDRLEN);
+ }
+
+ /* request for mtu */
+ if (mtu) {
+ if (ioctl(sockfd, SIOCGIFMTU, &req) == -1) {
+ RTE_LOG(ERR, EAL,
+ "%s: ioctl failed (SIOCGIFMTU)\n",
+ iface_name);
+ return -1;
+ }
+ *mtu = req.ifr_mtu;
+ }
+
+ return 0;
+}
+
+static int
+get_map_size(int sockfd, uint32_t *size)
+{
+ struct tpacket_dev_qpair_map_region_info info;
+ socklen_t optlen;
+ int ret;
+
+ /* request for map region info */
+ optlen = sizeof(struct tpacket_dev_qpair_map_region_info);
+ ret = getsockopt(sockfd, SOL_PACKET, PACKET_DEV_QPAIR_MAP_REGION_INFO,
+ &info, &optlen);
+ if (ret == -1) {
+ RTE_LOG(ERR, PMD,
+ "could not get PACKET_DEV_QPAIR_MAP_REGION_INFO "
+ "on AF_PACKET socket, errno = %d\n", errno);
+ return -1;
+ }
+
+ *size = info.tp_dev_bar_sz;
+ return 0;
+}
+
+/* map PCIE configure space of queue pairs */
+int
+rte_eal_bifurc_map(int sockfd, void **addr, uint32_t *size)
+{
+ if (addr == NULL || size == NULL)
+ return -1;
+
+ if (get_map_size(sockfd, size))
+ return -1;
+
+ *addr = mmap(NULL, *size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, sockfd, 0);
+ if (*addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL,
+ "call to mmap failed on AF_PACKET socket %d\n",
+ sockfd);
+ return -1;
+ }
+
+ RTE_LOG(INFO, EAL,
+ "mapping sockfd %d PCIE configuraiton space,"
+ "address = %p, size = 0x%x\n", sockfd,
+ *addr, *size);
+
+ return 0;
+}
+
+void
+rte_eal_bifurc_unmap(int sockfd, void *addr)
+{
+ uint32_t size;
+
+ if (addr && !get_map_size(sockfd, &size))
+ munmap(addr, size);
+}
+
+/* split queue pairs */
+int
+rte_eal_bifurc_split(int sockfd, uint32_t *nb_qp, uint32_t *qp_start)
+{
+ struct tpacket_dev_qpairs_info qpairs_info;
+ socklen_t optlen;
+ int ret;
+
+ optlen = sizeof(struct tpacket_dev_qpairs_info);
+
+ /* request for qpairs split */
+ qpairs_info.tp_qpairs_start_from = -1;
+ qpairs_info.tp_qpairs_num = *nb_qp;
+ ret = setsockopt(sockfd, SOL_PACKET, PACKET_RXTX_QPAIRS_SPLIT,
+ &qpairs_info, optlen);
+ if (ret == -1) {
+ RTE_LOG(ERR, EAL,
+ "request PACKET_RXTX_QPAIRS_SPLIT on AF_PACKET "
+ "socket for %d fail, errno = %d\n",
+ sockfd, errno);
+ return -1;
+ }
+
+ /* parse respone of qpairs split */
+ ret = getsockopt(sockfd, SOL_PACKET, PACKET_RXTX_QPAIRS_SPLIT,
+ &qpairs_info, &optlen);
+ if (ret == -1) {
+ RTE_LOG(ERR, EAL,
+ "could not get PACKET_RXTX_QPAIRS_SPLIT on AF_PACKET "
+ "socket for %d, errno = %d\n", sockfd, errno);
+ return -1;
+ }
+
+ *nb_qp = qpairs_info.tp_qpairs_num;
+ *qp_start = qpairs_info.tp_qpairs_start_from;
+
+ RTE_LOG(INFO, EAL,
+ "kernel driver allocates queue pairs from %u to %u\n",
+ qpairs_info.tp_qpairs_start_from,
+ qpairs_info.tp_qpairs_start_from +
+ qpairs_info.tp_qpairs_num - 1);
+
+ return 0;
+}
+
+/* retire queue pairs back */
+void
+rte_eal_bifurc_retire(int sockfd, uint32_t nb_qp, uint32_t qp_start)
+{
+ struct tpacket_dev_qpairs_info qpairs_info;
+ int ret;
+
+ /* return queues to kernel driver */
+ qpairs_info.tp_qpairs_start_from = qp_start;
+ qpairs_info.tp_qpairs_num = nb_qp;
+ ret = setsockopt(sockfd, SOL_PACKET, PACKET_RXTX_QPAIRS_RETURN,
+ &qpairs_info, sizeof(qpairs_info));
+ if (ret == -1)
+ RTE_LOG(ERR, EAL,
+ "could not set PACKET_RXTX_QPAIRS_RETURN on AF_PACKET "
+ "socket %d for queue pairs from %d to %d\n",
+ sockfd, qp_start, qp_start + nb_qp - 1);
+}
+
+/*
+ * Opens an AF_PACKET socket
+ */
+int
+rte_eal_bifurc_open(int *sockfd)
+{
+ /* Open an AF_PACKET socket... */
+ if (sockfd)
+ *sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+
+ if (!sockfd || *sockfd == -1) {
+ RTE_LOG(ERR, EAL, "Could not open AF_PACKET socket\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_eal_bifurc_bind(int sockfd, int if_index)
+{
+ struct sockaddr_ll sockaddr;
+ int ret;
+
+ /* af_packet bind iface with sockfd */
+ memset(&sockaddr, 0, sizeof(sockaddr));
+ sockaddr.sll_family = AF_PACKET;
+ sockaddr.sll_protocol = htons(ETH_P_ALL);
+ sockaddr.sll_ifindex = if_index;
+ ret = bind(sockfd, (const struct sockaddr *)&sockaddr,
+ sizeof(sockaddr));
+ if (ret == -1) {
+ RTE_LOG(ERR, EAL, "could not bind AF_PACKET socket to %d\n",
+ if_index);
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+rte_eal_bifurc_set_pci(int sockfd, struct rte_pci_device *pci_dev)
+{
+ struct tpacket_dev_info dev_info;
+ socklen_t optlen;
+ int ret;
+
+ /* request for desc info */
+ optlen = sizeof(struct tpacket_dev_info);
+ ret = getsockopt(sockfd, SOL_PACKET, PACKET_DEV_DESC_INFO,
+ &dev_info, &optlen);
+ if (ret == -1) {
+ RTE_LOG(ERR, EAL,
+ "could not get PACKET_DEV_DESC_INFO on AF_PACKET "
+ "socket for %d, errno = %d\n", sockfd, errno);
+ return -1;
+ }
+
+ RTE_LOG(INFO, EAL,
+ "vendorid = 0x%x, deviceid = 0x%x, "
+ "num of qpairs = %d, insue qpairs = %d\n",
+ dev_info.tp_vendor_id, dev_info.tp_device_id,
+ dev_info.tp_num_total_qpairs, dev_info.tp_num_inuse_qpairs);
+
+ /* pci_dev update and mmap configure space */
+ memset(pci_dev, 0, sizeof(*pci_dev));
+ pci_dev->id.vendor_id = dev_info.tp_vendor_id;
+ pci_dev->id.device_id = dev_info.tp_device_id;
+ pci_dev->id.subsystem_vendor_id = PCI_ANY_ID;
+ pci_dev->id.subsystem_device_id = PCI_ANY_ID;
+ pci_dev->numa_node = dev_info.tp_numa_node;
+
+ return 0;
+}
+
+struct rte_devargs *
+rte_eal_bifurc_get_devargs(const char *drv_name, const char *args)
+{
+ struct rte_devargs *devargs;
+ int ret;
+
+ devargs = malloc(sizeof(*devargs));
+ if (devargs == NULL) {
+ RTE_LOG(ERR, EAL, "cannot allocate devargs\n");
+ return NULL;
+ }
+ memset(devargs, 0, sizeof(*devargs));
+ devargs->type = RTE_DEVTYPE_VIRTUAL;
+
+ ret = snprintf(devargs->virtual.drv_name,
+ sizeof(devargs->virtual.drv_name), "%s", drv_name);
+ if (ret < 0 || ret >= (int)sizeof(devargs->virtual.drv_name)) {
+ RTE_LOG(ERR, EAL,
+ "driver name too large: <%s>\n", drv_name);
+ free(devargs);
+ return NULL;
+ }
+
+ ret = snprintf(devargs->args, sizeof(devargs->args), "%s", args);
+ if (ret < 0 || ret >= (int)sizeof(devargs->args)) {
+ RTE_LOG(ERR, EAL,
+ "driver args too large: <%s>\n", args);
+ free(devargs);
+ return NULL;
+ }
+ return devargs;
+}
+
+void
+rte_eal_bifurc_put_devargs(struct rte_devargs *devargs)
+{
+ if (devargs != NULL)
+ free(devargs);
+}
--
1.8.1.4
^ permalink raw reply [flat|nested] 24+ messages in thread
* [dpdk-dev] [RFC PATCH 3/6] pci: allow VDEV as pci device during device driver probe
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 1/6] eal: common direct ring access API Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 2/6] eal: direct ring access support by linux af_packet Cunming Liang
@ 2014-11-25 14:11 ` Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 4/6] bifurc: add driver to scan bifurcated netdev Cunming Liang
` (4 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Cunming Liang @ 2014-11-25 14:11 UTC (permalink / raw)
To: dev
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_eal/common/include/rte_pci.h | 4 ++++
lib/librte_eal/linuxapp/eal/eal_pci.c | 42 +++++++++++++++++++++------------
lib/librte_ether/rte_ethdev.c | 3 +--
3 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 66ed793..e205330 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -139,11 +139,13 @@ struct rte_pci_addr {
struct rte_devargs;
+#define RTE_PCI_DEV_NAME_SIZE (32)
/**
* A structure describing a PCI device.
*/
struct rte_pci_device {
TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
+ char name[RTE_PCI_DEV_NAME_SIZE]; /**< PCI device name. */
struct rte_pci_addr addr; /**< PCI location. */
struct rte_pci_id id; /**< PCI ID. */
struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
@@ -199,6 +201,8 @@ struct rte_pci_driver {
#define RTE_PCI_DRV_FORCE_UNBIND 0x0004
/** Device driver supports link state interrupt */
#define RTE_PCI_DRV_INTR_LSC 0x0008
+/** Device driver supports bifurcated queue pair mapping */
+#define RTE_PCI_DRV_BIFURC 0x0010
/**< Internal use only - Macro used by pci addr parsing functions **/
#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index ddb0535..8a97906 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -284,6 +284,10 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
return -1;
}
+ /* record pci address as device name */
+ snprintf(dev->name, RTE_PCI_DEV_NAME_SIZE, "%d:%d.%d",
+ bus, devid, function);
+
/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(&pci_device_list)) {
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
@@ -549,23 +553,31 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
return 1;
}
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
+ if ((dev->devargs != NULL) &&
+ (dev->devargs->type == RTE_DEVTYPE_VIRTUAL)) {
+ if (!(dr->drv_flags & RTE_PCI_DRV_BIFURC))
+ return 1;
+ } else {
+ if (dr->drv_flags & RTE_PCI_DRV_BIFURC)
+ return 1;
+ else if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
#ifdef RTE_PCI_CONFIG
- /*
- * Set PCIe config space for high performance.
- * Return value can be ignored.
- */
- pci_config_space_set(dev);
+ /*
+ * Set PCIe config space for high performance.
+ * Return value can be ignored.
+ */
+ pci_config_space_set(dev);
#endif
- /* map resources for devices that use igb_uio */
- ret = pci_map_device(dev);
- if (ret != 0)
- return ret;
- } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND &&
- rte_eal_process_type() == RTE_PROC_PRIMARY) {
- /* unbind current driver */
- if (pci_unbind_kernel_driver(dev) < 0)
- return -1;
+ /* map resources for devices that use igb_uio */
+ ret = pci_map_device(dev);
+ if (ret != 0)
+ return ret;
+ } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* unbind current driver */
+ if (pci_unbind_kernel_driver(dev) < 0)
+ return -1;
+ }
}
/* reference driver structure */
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 3e2b5d8..76be736 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -246,8 +246,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_drv = (struct eth_driver *)pci_drv;
/* Create unique Ethernet device name using PCI address */
- snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%d:%d.%d",
- pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
+ snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%s", pci_dev->name);
eth_dev = rte_eth_dev_allocate(ethdev_name);
if (eth_dev == NULL)
--
1.8.1.4
^ permalink raw reply [flat|nested] 24+ messages in thread
* [dpdk-dev] [RFC PATCH 4/6] bifurc: add driver to scan bifurcated netdev
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
` (2 preceding siblings ...)
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 3/6] pci: allow VDEV as pci device during device driver probe Cunming Liang
@ 2014-11-25 14:11 ` Cunming Liang
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 5/6] ixgbe: rx/tx queue stop bug fix Cunming Liang
` (3 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Cunming Liang @ 2014-11-25 14:11 UTC (permalink / raw)
To: dev
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
config/common_linuxapp | 5 +
lib/Makefile | 1 +
lib/librte_bifurc/Makefile | 58 +++++++++
lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++++++++++++++++++++++
lib/librte_bifurc/rte_bifurc.h | 90 +++++++++++++
mk/rte.app.mk | 6 +
6 files changed, 444 insertions(+)
create mode 100644 lib/librte_bifurc/Makefile
create mode 100644 lib/librte_bifurc/rte_bifurc.c
create mode 100644 lib/librte_bifurc/rte_bifurc.h
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 86a0d15..72fe0b1 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -163,6 +163,11 @@ CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
#
+# Compile bifurcate driver backed by AF_PACKET sockets (Linux only)
+#
+CONFIG_RTE_LIBRTE_BIFURC=y
+
+#
# Support NIC bypass logic
#
CONFIG_RTE_NIC_BYPASS=n
diff --git a/lib/Makefile b/lib/Makefile
index 204ef11..c59ae5b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -69,6 +69,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
+DIRS-$(CONFIG_RTE_LIBRTE_BIFURC) += librte_bifurc
endif
include $(RTE_SDK)/mk/rte.sharelib.mk
diff --git a/lib/librte_bifurc/Makefile b/lib/librte_bifurc/Makefile
new file mode 100644
index 0000000..c5c1894
--- /dev/null
+++ b/lib/librte_bifurc/Makefile
@@ -0,0 +1,58 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_bifurc.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_BIFURC) += rte_bifurc.c
+
+#
+# Export include files
+#
+SYMLINK-$(CONFIG_RTE_LIBRTE_BIFURC)-include += rte_bifurc.h
+
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BIFURC) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BIFURC) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BIFURC) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bifurc/rte_bifurc.c b/lib/librte_bifurc/rte_bifurc.c
new file mode 100644
index 0000000..8cb29e3
--- /dev/null
+++ b/lib/librte_bifurc/rte_bifurc.c
@@ -0,0 +1,284 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <errno.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_dev.h>
+#include <rte_pci.h>
+#include <rte_ethdev.h>
+#include <rte_kvargs.h>
+#include <rte_pci_bifurc.h>
+#include "rte_bifurc.h"
+
+void
+rte_bifurc_ethdev_get_info(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ struct bifurc_device *bif_dev = (struct bifurc_device *)dev->pci_dev;
+
+ dev_info->if_index = bif_dev->if_index;
+ dev_info->max_mac_addrs = 1;
+ dev_info->max_rx_pktlen = (uint32_t)bif_dev->mtu;
+ dev_info->max_rx_queues = (uint32_t)bif_dev->nb_queues;
+ dev_info->max_tx_queues = (uint32_t)bif_dev->nb_queues;
+ dev_info->min_rx_bufsize = 0;
+ dev_info->pci_dev = NULL;
+}
+
+static const char *valid_arguments[] = {
+ RTE_BIFURC_IFACE_ARG,
+ RTE_BIFURC_NUM_QPAIRS_ARG,
+ NULL,
+};
+
+static int
+bif_get_ifname(const char *key __rte_unused,
+ const char *iface,
+ void *extra_args)
+{
+ char *ifname = (char *)extra_args;
+ uint16_t ifnamelen;
+
+ ifnamelen = strlen(iface);
+ if (ifnamelen >= IFNAMSIZ) {
+ RTE_LOG(ERR, EAL,
+ "I/F name too long (%s)\n", iface);
+ return -1;
+ }
+
+ /* request for ifindex */
+ memcpy(ifname, iface, ifnamelen);
+ ifname[ifnamelen] = '\0';
+
+ return 0;
+}
+
+static int
+bif_get_qp(const char *key __rte_unused,
+ const char *value,
+ void *extra_args)
+{
+ uint32_t qpairs = RTE_BIFURC_PMD_MAX_QPAIRS;
+ uint32_t *nb_qp = (uint32_t *)extra_args;
+
+ qpairs = atoi(value);
+ if (qpairs < 1 ||
+ qpairs > RTE_BIFURC_PMD_MAX_QPAIRS) {
+ RTE_LOG(ERR, EAL, "invalid qpairs value\n");
+ return -1;
+ }
+
+ *nb_qp = qpairs;
+
+ return 0;
+}
+
+static struct bifurc_device *
+rte_bifurc_alloc_dev(char *iface, struct rte_devargs *devargs)
+{
+ struct bifurc_device *dev = NULL;
+ uint16_t ifnamelen;
+ int sockfd = -1;
+
+ if (iface == NULL || devargs == NULL)
+ goto error;
+
+ if (rte_eal_bifurc_open(&sockfd))
+ goto error;
+
+ ifnamelen = strlen(iface);
+ if (ifnamelen >= IFNAMSIZ) {
+ RTE_LOG(ERR, EAL,
+ "I/F name too long (%s)\n", iface);
+ goto error;
+ }
+
+ /* alloc dev and bind to af_packet socket */
+ dev = calloc(sizeof(*dev), 1);
+ if (dev == NULL)
+ goto error;
+
+ memcpy(dev->iface_name, iface, ifnamelen);
+ dev->iface_name[ifnamelen] = '\0';
+
+ if (rte_eal_bifurc_get_ifinfo(sockfd, dev->iface_name, &dev->if_index,
+ dev->hwaddr, &dev->mtu) != 0)
+ goto error;
+
+ if (rte_eal_bifurc_bind(sockfd, dev->if_index) != 0)
+ goto error;
+
+ /* update dev after bind success */
+ dev->fd = sockfd;
+
+ if (rte_eal_bifurc_set_pci(sockfd, &dev->pci_dev) != 0)
+ goto error;
+
+ /* use iface name as pci_dev name */
+ snprintf(dev->pci_dev.name, RTE_PCI_DEV_NAME_SIZE, "%s",
+ dev->iface_name);
+
+ dev->pci_dev.devargs = devargs;
+
+ TAILQ_INSERT_TAIL(&pci_device_list, &dev->pci_dev, next);
+
+ return dev;
+
+error:
+ if (sockfd >= 0)
+ close(sockfd);
+ if (dev)
+ free(dev);
+
+ return NULL;
+}
+
+static void
+rte_bifurc_free_dev(struct bifurc_device *dev)
+{
+ struct rte_pci_device *pci_dev = NULL;
+
+ if (!dev)
+ return;
+
+ pci_dev = &dev->pci_dev;
+
+ /* unmap the mapped device memory */
+ rte_eal_bifurc_unmap(dev->fd, pci_dev->mem_resource[0].addr);
+
+ /* return queues to kernel driver */
+ rte_eal_bifurc_retire(dev->fd, dev->nb_queues, dev->qp_lower);
+
+ /* free the rest */
+ if (pci_dev->devargs != NULL)
+ free(pci_dev->devargs);
+
+ close(dev->fd);
+ free(dev);
+}
+
+void
+rte_bifurc_ethdev_free(struct rte_eth_dev *dev)
+{
+ rte_bifurc_free_dev((struct bifurc_device *)dev->pci_dev);
+}
+
+static int
+rte_bifurc_dev_init(const char *name, const char *params)
+{
+ struct rte_devargs *devargs = NULL;
+ struct rte_kvargs *kvlist;
+ struct bifurc_device *dev = NULL;
+ unsigned nb_qp;
+ int ret = 0;
+ char iface[IFNAMSIZ];
+
+ RTE_LOG(INFO, PMD, "Initializing %s vdev\n", name);
+
+ kvlist = rte_kvargs_parse(params, valid_arguments);
+ if (kvlist == NULL)
+ return -1;
+
+ /*
+ * If iface argument is passed we open the NICs and use them for
+ * reading / writing
+ */
+ /* prepare dev info from kvlist */
+ if (rte_kvargs_count(kvlist, RTE_BIFURC_NUM_QPAIRS_ARG))
+ ret |= rte_kvargs_process(kvlist,
+ RTE_BIFURC_NUM_QPAIRS_ARG,
+ &bif_get_qp, &nb_qp);
+
+ if (rte_kvargs_count(kvlist, RTE_BIFURC_IFACE_ARG))
+ ret |= rte_kvargs_process(kvlist, RTE_BIFURC_IFACE_ARG,
+ &bif_get_ifname, &iface);
+ if (ret)
+ goto exit;
+
+ devargs = rte_eal_bifurc_get_devargs(name, params);
+ if (devargs == NULL) {
+ ret = -1;
+ goto exit;
+ }
+
+ dev = rte_bifurc_alloc_dev(iface, devargs);
+ if (dev == NULL) {
+ printf("no dev attach\n");
+ ret = -1;
+ goto exit;
+ }
+
+ dev->nb_queues = nb_qp;
+ ret = rte_eal_bifurc_split(dev->fd, &dev->nb_queues, &dev->qp_lower);
+ if (ret != 0)
+ goto exit;
+
+ if (rte_eal_bifurc_map(dev->fd,
+ &(dev->pci_dev.mem_resource[0].addr),
+ (uint32_t *)
+ &(dev->pci_dev.mem_resource[0].len)))
+ goto exit;
+
+ return 0;
+exit:
+ if (devargs)
+ rte_eal_bifurc_put_devargs(devargs);
+
+ if (dev)
+ rte_bifurc_free_dev(dev);
+
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+static struct rte_driver bifurc_bus_drv = {
+ .name = RTE_BIFURC_DRV_NAME,
+ .type = PMD_VDEV,
+ .init = rte_bifurc_dev_init,
+};
+
+PMD_REGISTER_DRIVER(bifurc_bus_drv);
diff --git a/lib/librte_bifurc/rte_bifurc.h b/lib/librte_bifurc/rte_bifurc.h
new file mode 100644
index 0000000..c0951a5
--- /dev/null
+++ b/lib/librte_bifurc/rte_bifurc.h
@@ -0,0 +1,90 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BIFURC_H_
+#define _RTE_BIFURC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_pci_bifurc.h>
+#include <rte_ethdev.h>
+
+#define RTE_BIFURC_DRV_NAME "rte_bifurc"
+#define RTE_BIFURC_IFACE_ARG "iface"
+#define RTE_BIFURC_NUM_QPAIRS_ARG "qpairs"
+#define RTE_BIFURC_PMD_MAX_QPAIRS 2
+
+#define RTE_BIFURC_IFNAMESIZ 16
+#define RTE_BIFURC_HWADDR_LEN 6
+
+struct bifurc_device {
+ struct rte_pci_device pci_dev;
+ char iface_name[RTE_BIFURC_IFNAMESIZ];
+ int fd;
+ int if_index;
+ uint8_t hwaddr[RTE_BIFURC_HWADDR_LEN];
+ int mtu;
+ unsigned qp_lower;
+ unsigned nb_queues;
+};
+
+void
+rte_bifurc_ethdev_get_info(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info);
+
+void
+rte_bifurc_ethdev_free(struct rte_eth_dev *dev);
+
+static inline uint32_t
+rte_bifurc_qp_base(struct rte_eth_dev *dev)
+{
+ struct bifurc_device *bif_dev =
+ (struct bifurc_device *)dev->pci_dev;
+ return bif_dev->qp_lower;
+}
+
+static inline void
+rte_bifurc_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct bifurc_device *bif_dev =
+ (struct bifurc_device *)dev->pci_dev;
+ ether_addr_copy((struct ether_addr *)bif_dev->hwaddr, mac_addr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BIFURC_H_ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 59468b0..ea074ab 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -219,6 +219,12 @@ ifeq ($(CONFIG_RTE_LIBRTE_PMD_AF_PACKET),y)
LDLIBS += -lrte_pmd_af_packet
endif
+ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+LDLIBS += -lrte_bifurc
+endif
+endif
+
endif # plugins
LDLIBS += $(EXECENV_LDLIBS)
--
1.8.1.4
^ permalink raw reply [flat|nested] 24+ messages in thread
* [dpdk-dev] [RFC PATCH 5/6] ixgbe: rx/tx queue stop bug fix
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
` (3 preceding siblings ...)
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 4/6] bifurc: add driver to scan bifurcated netdev Cunming Liang
@ 2014-11-25 14:11 ` Cunming Liang
2014-11-26 0:44 ` Ouyang, Changchun
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device Cunming Liang
` (2 subsequent siblings)
7 siblings, 1 reply; 24+ messages in thread
From: Cunming Liang @ 2014-11-25 14:11 UTC (permalink / raw)
To: dev
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f9b3fe3..e240376 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3805,7 +3805,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
do {
rte_delay_ms(1);
rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
- } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
+ } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
rx_queue_id);
@@ -3906,7 +3906,7 @@ ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
rte_delay_ms(1);
txdctl = IXGBE_READ_REG(hw,
IXGBE_TXDCTL(txq->reg_idx));
- } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
+ } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
if (!poll_ms)
PMD_INIT_LOG(ERR, "Could not disable "
"Tx Queue %d", tx_queue_id);
--
1.8.1.4
^ permalink raw reply [flat|nested] 24+ messages in thread
* [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
` (4 preceding siblings ...)
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 5/6] ixgbe: rx/tx queue stop bug fix Cunming Liang
@ 2014-11-25 14:11 ` Cunming Liang
2014-11-25 14:34 ` Bruce Richardson
2014-11-25 14:23 ` [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Neil Horman
2015-04-09 3:43 ` 贾学涛
7 siblings, 1 reply; 24+ messages in thread
From: Cunming Liang @ 2014-11-25 14:11 UTC (permalink / raw)
To: dev
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_pmd_ixgbe/Makefile | 13 +-
lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303 +++++++++++++++++++++++++++++++++
lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++++
lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 40 ++++-
lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 ++
5 files changed, 415 insertions(+), 8 deletions(-)
create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
diff --git a/lib/librte_pmd_ixgbe/Makefile b/lib/librte_pmd_ixgbe/Makefile
index 3588047..6867f17 100644
--- a/lib/librte_pmd_ixgbe/Makefile
+++ b/lib/librte_pmd_ixgbe/Makefile
@@ -37,7 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_pmd_ixgbe.a
CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
+CFLAGS += $(WERROR_FLAGS) -Wno-cast-qual
ifeq ($(CC), icc)
#
@@ -108,10 +108,21 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bypass.c
SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_82599_bypass.c
endif
+ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bifurcate.c
+endif
+endif
# this lib depends upon:
DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_eal lib/librte_ether
DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_mempool lib/librte_mbuf
DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_net lib/librte_malloc
+ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_bifurc
+endif
+endif
+
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
new file mode 100644
index 0000000..84c445a
--- /dev/null
+++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
@@ -0,0 +1,303 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_dev.h>
+#include <rte_pci.h>
+#include <rte_ethdev.h>
+#include <rte_kvargs.h>
+#include <rte_bifurc.h>
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <errno.h>
+
+#include "ixgbe_logs.h"
+#include "ixgbe_ethdev.h"
+#include "ixgbe/ixgbe_api.h"
+
+#include <rte_mbuf.h>
+#include <rte_bifurc.h>
+#include "ixgbe_rxtx.h"
+
+static int
+ixgbe_dev_bfc_configure(struct rte_eth_dev *dev __rte_unused)
+{
+ return 0;
+}
+
+static void
+ixgbe_dev_bfc_info(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ rte_bifurc_ethdev_get_info(dev, dev_info);
+}
+
+static void
+ixgbe_dev_bfc_stats_get(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_eth_stats *igb_stats)
+{
+ return;
+}
+
+static int
+ixgbe_dev_bfc_start(struct rte_eth_dev *dev)
+{
+ int err;
+
+ /* initialize transmission unit */
+ ixgbe_dev_tx_init(dev);
+
+ /* This can fail when allocating mbufs for descriptor rings */
+ err = ixgbe_dev_rx_init(dev);
+ if (err) {
+ PMD_INIT_LOG(ERR, "Unable to initialize RX hardware\n");
+ goto error;
+ }
+
+ ixgbe_dev_rxtx_start(dev);
+
+ return 0;
+
+error:
+ PMD_INIT_LOG(ERR, "failure in ixgbe_dev_start(): %d", err);
+ ixgbe_dev_clear_queues(dev);
+ return -EIO;
+}
+
+static void
+ixgbe_dev_bfc_stop(struct rte_eth_dev *dev)
+{
+ unsigned i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++)
+ ixgbe_dev_tx_queue_stop(dev, i);
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++)
+ ixgbe_dev_rx_queue_stop(dev, i);
+}
+
+static void
+ixgbe_dev_bfc_close(struct rte_eth_dev *dev)
+{
+ ixgbe_dev_bfc_stop(dev);
+
+ rte_bifurc_ethdev_free(dev);
+}
+
+static inline int
+rte_ixgbe_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
+{
+ struct rte_eth_link *dst = &(dev->data->dev_link);
+ struct rte_eth_link *src = link;
+
+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+ *(uint64_t *)src) == 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+ixgbe_dev_bfc_link_update(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused int wait_to_complete)
+{
+ struct rte_eth_link link;
+
+ link.link_status = 1;
+ link.link_duplex = ETH_LINK_FULL_DUPLEX;
+ link.link_speed = ETH_LINK_SPEED_10000;
+
+ rte_ixgbe_dev_atomic_write_link_status(dev, &link);
+
+ return 0;
+}
+
+static int
+ixgbe_dev_bfc_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx,
+ uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
+ uint16_t offset = rte_bifurc_qp_base(dev);
+ return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
+ socket_id, rx_conf, mp);
+}
+
+static int
+ixgbe_dev_bfc_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx,
+ uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ uint16_t offset = rte_bifurc_qp_base(dev);
+ return ixgbe_dev_txq_setup(dev, queue_idx, offset,
+ nb_desc, socket_id, tx_conf);
+}
+
+static struct eth_dev_ops ixgbe_bifurc_ops = {
+ .dev_start = ixgbe_dev_bfc_start,
+ .dev_stop = ixgbe_dev_bfc_stop,
+ .dev_close = ixgbe_dev_bfc_close,
+ .dev_configure = ixgbe_dev_bfc_configure,
+ .dev_infos_get = ixgbe_dev_bfc_info,
+ .rx_queue_setup = ixgbe_dev_bfc_rx_queue_setup,
+ .tx_queue_setup = ixgbe_dev_bfc_tx_queue_setup,
+ .rx_queue_release = ixgbe_dev_rx_queue_release,
+ .tx_queue_release = ixgbe_dev_tx_queue_release,
+ .link_update = ixgbe_dev_bfc_link_update,
+ .stats_get = ixgbe_dev_bfc_stats_get,
+ .stats_reset = NULL,
+};
+
+static int
+eth_ixgbe_bifurc_dev_init(struct eth_driver *eth_drv __rte_unused,
+ struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ int diag;
+
+ PMD_INIT_FUNC_TRACE();
+
+ eth_dev->dev_ops = &ixgbe_bifurc_ops;
+ eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
+ eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+
+ /* for secondary processes, we don't initialise any further as primary
+ * has already done this work. Only check we don't need a different
+ * RX function */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ if (eth_dev->data->scattered_rx)
+ eth_dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+ return 0;
+ }
+ pci_dev = eth_dev->pci_dev;
+
+ /* Vendor and Device ID need to be set before init of shared code */
+ hw->device_id = pci_dev->id.device_id;
+ hw->vendor_id = pci_dev->id.vendor_id;
+ hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
+
+#ifdef RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP
+ hw->allow_unsupported_sfp = 1;
+#endif
+
+ /* Initialize the shared code (base driver) */
+#ifdef RTE_NIC_BYPASS
+ diag = ixgbe_bypass_init_shared_code(hw);
+#else
+ diag = ixgbe_init_shared_code(hw);
+#endif /* RTE_NIC_BYPASS */
+
+ if (diag != IXGBE_SUCCESS) {
+ PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
+ return -EIO;
+ }
+
+ /* Allocate memory for storing MAC addresses */
+ eth_dev->data->mac_addrs = rte_zmalloc("ixgbe", ETHER_ADDR_LEN *
+ hw->mac.num_rar_entries, 0);
+ if (eth_dev->data->mac_addrs == NULL) {
+ PMD_INIT_LOG(ERR,
+ "Failed to allocate %u bytes needed to store "
+ "MAC addresses",
+ ETHER_ADDR_LEN * hw->mac.num_rar_entries);
+ return -ENOMEM;
+ }
+ rte_bifurc_mac_addr(eth_dev, ð_dev->data->mac_addrs[0]);
+
+ return diag;
+}
+
+/*
+ * The set of PCI devices this driver supports
+ */
+static struct rte_pci_id pci_id_ixgbe_map[] = {
+
+#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
+#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
+#include "rte_pci_dev_ids.h"
+
+{ .vendor_id = 0, /* sentinel */ },
+};
+
+static struct eth_driver rte_ixgbe_bifurc_pmd = {
+ {
+ .name = "rte_ixgbe_bifurc_pmd",
+ .id_table = pci_id_ixgbe_map,
+ .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
+ RTE_PCI_DRV_BIFURC,
+ },
+ .eth_dev_init = eth_ixgbe_bifurc_dev_init,
+ .dev_private_size = sizeof(struct ixgbe_adapter),
+};
+
+/*
+ * Driver initialization routine.
+ * Invoked once at EAL init time.
+ * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
+ */
+static int
+rte_ixgbe_bifurc_pmd_init(const char *name __rte_unused,
+ const char *params __rte_unused)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ rte_eth_driver_register(&rte_ixgbe_bifurc_pmd);
+ return 0;
+}
+
+static struct rte_driver rte_ixgbe_bifurc_driver = {
+ .type = PMD_PDEV,
+ .init = rte_ixgbe_bifurc_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_ixgbe_bifurc_driver);
diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
new file mode 100644
index 0000000..d40b21d
--- /dev/null
+++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
@@ -0,0 +1,57 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IXGBE_BIFFURCATE_H_
+#define _IXGBE_BIFFURCATE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IXGBE_82599_MAX_RX_QUEUES 128
+
+#define RTE_PMD_PACKET_RING_SPLITOFF_LOWER_LIMIT 32
+#define RTE_PMD_PACKET_MAX_RING_PAIRS IXGBE_82599_MAX_RX_QUEUES
+
+
+/**
+ * For use by the EAL only. Called as part of EAL init to set up any dummy NICs
+ * configured on command line.
+ */
+int rte_ixgbe_bfc_pmd_init(const char *name, const char *params);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index e240376..2d32907 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -100,6 +100,12 @@ rte_rxmbuf_alloc(struct rte_mempool *mp)
return (m);
}
+static inline uint16_t
+ixgbe_dev_queue_offset(struct rte_eth_dev *dev)
+{
+ return (RTE_ETH_DEV_SRIOV(dev).active == 0) ?
+ 0 : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx;
+}
#if 1
#define RTE_PMD_USE_PREFETCH
@@ -1726,6 +1732,17 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
+ uint16_t offset = ixgbe_dev_queue_offset(dev);
+ return ixgbe_dev_txq_setup(dev, queue_idx, offset,
+ nb_desc, socket_id, tx_conf);
+}
+
+int
+ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t offset,
+ uint16_t nb_desc, unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
const struct rte_memzone *tz;
struct igb_tx_queue *txq;
struct ixgbe_hw *hw;
@@ -1849,8 +1866,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->hthresh = tx_conf->tx_thresh.hthresh;
txq->wthresh = tx_conf->tx_thresh.wthresh;
txq->queue_id = queue_idx;
- txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
- queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
+ txq->reg_idx = queue_idx + offset;
txq->port_id = dev->data->port_id;
txq->txq_flags = tx_conf->txq_flags;
txq->ops = &def_txq_ops;
@@ -2083,6 +2099,18 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
+ uint16_t offset = ixgbe_dev_queue_offset(dev);
+ return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
+ socket_id, rx_conf, mp);
+}
+
+int
+ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t offset,
+ uint16_t nb_desc, unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
const struct rte_memzone *rz;
struct igb_rx_queue *rxq;
struct ixgbe_hw *hw;
@@ -2118,8 +2146,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
rxq->nb_rx_desc = nb_desc;
rxq->rx_free_thresh = rx_conf->rx_free_thresh;
rxq->queue_id = queue_idx;
- rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
- queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
+ rxq->reg_idx = queue_idx + offset;
rxq->port_id = dev->data->port_id;
rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
0 : ETHER_CRC_LEN);
@@ -3402,9 +3429,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
uint32_t fctrl;
uint32_t hlreg0;
uint32_t maxfrs;
- uint32_t srrctl;
uint32_t rdrxctl;
uint32_t rxcsum;
+ uint32_t srrctl;
uint16_t buf_size;
uint16_t i;
@@ -3684,9 +3711,9 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
struct ixgbe_hw *hw;
struct igb_tx_queue *txq;
struct igb_rx_queue *rxq;
- uint32_t txdctl;
uint32_t dmatxctl;
uint32_t rxctrl;
+ uint32_t txdctl;
uint16_t i;
PMD_INIT_FUNC_TRACE();
@@ -3731,7 +3758,6 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
if (hw->mac.type == ixgbe_mac_82599EB &&
dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
ixgbe_setup_loopback_link_82599(hw);
-
}
/*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
index eb89715..aeffb5f 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
@@ -243,6 +243,16 @@ struct ixgbe_txq_ops {
IXGBE_ADVTXD_DCMD_DEXT |\
IXGBE_ADVTXD_DCMD_EOP)
+int ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t offset,
+ uint16_t nb_desc, unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+int ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t offset,
+ uint16_t nb_desc, unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp);
+
#ifdef RTE_IXGBE_INC_VECTOR
uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
--
1.8.1.4
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device Cunming Liang
@ 2014-11-25 14:34 ` Bruce Richardson
2014-11-25 14:48 ` Liang, Cunming
0 siblings, 1 reply; 24+ messages in thread
From: Bruce Richardson @ 2014-11-25 14:34 UTC (permalink / raw)
To: Cunming Liang; +Cc: dev
On Tue, Nov 25, 2014 at 10:11:22PM +0800, Cunming Liang wrote:
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
> lib/librte_pmd_ixgbe/Makefile | 13 +-
> lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303 +++++++++++++++++++++++++++++++++
> lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++++
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 40 ++++-
> lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 ++
> 5 files changed, 415 insertions(+), 8 deletions(-)
> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
>
These changes are the ones that I'm not too sure about. I'd prefer if all
material for the bifurcated driver be kept within the librte_pmd_bifurc directory.
Is it possible to leave ixgbe largely unmodified and simply have the new
bifurcated driver pull in the needed ixgbe (and later i40e) functions at
compile time i.e. refer from one Makefile to the sources in the other
driver's directory?
My thinking is that the bifurcated driver is so significantly different in
the way it works, and the limits on it's functionality e.g. no direct filter
support or queue management, that it's best kept completely separate and only
"borrow" the needed descriptor read/write functions from the other drivers as is
needed.
Just my 2c. I'm curious as to what others think.
/Bruce
> diff --git a/lib/librte_pmd_ixgbe/Makefile b/lib/librte_pmd_ixgbe/Makefile
> index 3588047..6867f17 100644
> --- a/lib/librte_pmd_ixgbe/Makefile
> +++ b/lib/librte_pmd_ixgbe/Makefile
> @@ -37,7 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
> LIB = librte_pmd_ixgbe.a
>
> CFLAGS += -O3
> -CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += $(WERROR_FLAGS) -Wno-cast-qual
>
> ifeq ($(CC), icc)
> #
> @@ -108,10 +108,21 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bypass.c
> SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_82599_bypass.c
> endif
>
> +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> +SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bifurcate.c
> +endif
> +endif
>
> # this lib depends upon:
> DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_eal lib/librte_ether
> DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_mempool lib/librte_mbuf
> DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_net lib/librte_malloc
> +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_bifurc
> +endif
> +endif
> +
>
> include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> new file mode 100644
> index 0000000..84c445a
> --- /dev/null
> +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> @@ -0,0 +1,303 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memzone.h>
> +#include <rte_dev.h>
> +#include <rte_pci.h>
> +#include <rte_ethdev.h>
> +#include <rte_kvargs.h>
> +#include <rte_bifurc.h>
> +
> +#include <linux/if_ether.h>
> +#include <linux/if_packet.h>
> +#include <arpa/inet.h>
> +#include <net/if.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include <poll.h>
> +#include <errno.h>
> +
> +#include "ixgbe_logs.h"
> +#include "ixgbe_ethdev.h"
> +#include "ixgbe/ixgbe_api.h"
> +
> +#include <rte_mbuf.h>
> +#include <rte_bifurc.h>
> +#include "ixgbe_rxtx.h"
> +
> +static int
> +ixgbe_dev_bfc_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> + return 0;
> +}
> +
> +static void
> +ixgbe_dev_bfc_info(struct rte_eth_dev *dev,
> + struct rte_eth_dev_info *dev_info)
> +{
> + rte_bifurc_ethdev_get_info(dev, dev_info);
> +}
> +
> +static void
> +ixgbe_dev_bfc_stats_get(__rte_unused struct rte_eth_dev *dev,
> + __rte_unused struct rte_eth_stats *igb_stats)
> +{
> + return;
> +}
> +
> +static int
> +ixgbe_dev_bfc_start(struct rte_eth_dev *dev)
> +{
> + int err;
> +
> + /* initialize transmission unit */
> + ixgbe_dev_tx_init(dev);
> +
> + /* This can fail when allocating mbufs for descriptor rings */
> + err = ixgbe_dev_rx_init(dev);
> + if (err) {
> + PMD_INIT_LOG(ERR, "Unable to initialize RX hardware\n");
> + goto error;
> + }
> +
> + ixgbe_dev_rxtx_start(dev);
> +
> + return 0;
> +
> +error:
> + PMD_INIT_LOG(ERR, "failure in ixgbe_dev_start(): %d", err);
> + ixgbe_dev_clear_queues(dev);
> + return -EIO;
> +}
> +
> +static void
> +ixgbe_dev_bfc_stop(struct rte_eth_dev *dev)
> +{
> + unsigned i;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + for (i = 0; i < dev->data->nb_tx_queues; i++)
> + ixgbe_dev_tx_queue_stop(dev, i);
> +
> + for (i = 0; i < dev->data->nb_rx_queues; i++)
> + ixgbe_dev_rx_queue_stop(dev, i);
> +}
> +
> +static void
> +ixgbe_dev_bfc_close(struct rte_eth_dev *dev)
> +{
> + ixgbe_dev_bfc_stop(dev);
> +
> + rte_bifurc_ethdev_free(dev);
> +}
> +
> +static inline int
> +rte_ixgbe_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> + struct rte_eth_link *link)
> +{
> + struct rte_eth_link *dst = &(dev->data->dev_link);
> + struct rte_eth_link *src = link;
> +
> + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> + *(uint64_t *)src) == 0)
> + return -1;
> +
> + return 0;
> +}
> +
> +static int
> +ixgbe_dev_bfc_link_update(__rte_unused struct rte_eth_dev *dev,
> + __rte_unused int wait_to_complete)
> +{
> + struct rte_eth_link link;
> +
> + link.link_status = 1;
> + link.link_duplex = ETH_LINK_FULL_DUPLEX;
> + link.link_speed = ETH_LINK_SPEED_10000;
> +
> + rte_ixgbe_dev_atomic_write_link_status(dev, &link);
> +
> + return 0;
> +}
> +
> +static int
> +ixgbe_dev_bfc_rx_queue_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx,
> + uint16_t nb_desc,
> + unsigned int socket_id,
> + const struct rte_eth_rxconf *rx_conf,
> + struct rte_mempool *mp)
> +{
> + uint16_t offset = rte_bifurc_qp_base(dev);
> + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> + socket_id, rx_conf, mp);
> +}
> +
> +static int
> +ixgbe_dev_bfc_tx_queue_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx,
> + uint16_t nb_desc,
> + unsigned int socket_id,
> + const struct rte_eth_txconf *tx_conf)
> +{
> + uint16_t offset = rte_bifurc_qp_base(dev);
> + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> + nb_desc, socket_id, tx_conf);
> +}
> +
> +static struct eth_dev_ops ixgbe_bifurc_ops = {
> + .dev_start = ixgbe_dev_bfc_start,
> + .dev_stop = ixgbe_dev_bfc_stop,
> + .dev_close = ixgbe_dev_bfc_close,
> + .dev_configure = ixgbe_dev_bfc_configure,
> + .dev_infos_get = ixgbe_dev_bfc_info,
> + .rx_queue_setup = ixgbe_dev_bfc_rx_queue_setup,
> + .tx_queue_setup = ixgbe_dev_bfc_tx_queue_setup,
> + .rx_queue_release = ixgbe_dev_rx_queue_release,
> + .tx_queue_release = ixgbe_dev_tx_queue_release,
> + .link_update = ixgbe_dev_bfc_link_update,
> + .stats_get = ixgbe_dev_bfc_stats_get,
> + .stats_reset = NULL,
> +};
> +
> +static int
> +eth_ixgbe_bifurc_dev_init(struct eth_driver *eth_drv __rte_unused,
> + struct rte_eth_dev *eth_dev)
> +{
> + struct rte_pci_device *pci_dev;
> + struct ixgbe_hw *hw =
> + IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
> + int diag;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + eth_dev->dev_ops = &ixgbe_bifurc_ops;
> + eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
> + eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> +
> + /* for secondary processes, we don't initialise any further as primary
> + * has already done this work. Only check we don't need a different
> + * RX function */
> + if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> + if (eth_dev->data->scattered_rx)
> + eth_dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
> + return 0;
> + }
> + pci_dev = eth_dev->pci_dev;
> +
> + /* Vendor and Device ID need to be set before init of shared code */
> + hw->device_id = pci_dev->id.device_id;
> + hw->vendor_id = pci_dev->id.vendor_id;
> + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
> +
> +#ifdef RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP
> + hw->allow_unsupported_sfp = 1;
> +#endif
> +
> + /* Initialize the shared code (base driver) */
> +#ifdef RTE_NIC_BYPASS
> + diag = ixgbe_bypass_init_shared_code(hw);
> +#else
> + diag = ixgbe_init_shared_code(hw);
> +#endif /* RTE_NIC_BYPASS */
> +
> + if (diag != IXGBE_SUCCESS) {
> + PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
> + return -EIO;
> + }
> +
> + /* Allocate memory for storing MAC addresses */
> + eth_dev->data->mac_addrs = rte_zmalloc("ixgbe", ETHER_ADDR_LEN *
> + hw->mac.num_rar_entries, 0);
> + if (eth_dev->data->mac_addrs == NULL) {
> + PMD_INIT_LOG(ERR,
> + "Failed to allocate %u bytes needed to store "
> + "MAC addresses",
> + ETHER_ADDR_LEN * hw->mac.num_rar_entries);
> + return -ENOMEM;
> + }
> + rte_bifurc_mac_addr(eth_dev, ð_dev->data->mac_addrs[0]);
> +
> + return diag;
> +}
> +
> +/*
> + * The set of PCI devices this driver supports
> + */
> +static struct rte_pci_id pci_id_ixgbe_map[] = {
> +
> +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
> +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
> +#include "rte_pci_dev_ids.h"
> +
> +{ .vendor_id = 0, /* sentinel */ },
> +};
> +
> +static struct eth_driver rte_ixgbe_bifurc_pmd = {
> + {
> + .name = "rte_ixgbe_bifurc_pmd",
> + .id_table = pci_id_ixgbe_map,
> + .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
> + RTE_PCI_DRV_BIFURC,
> + },
> + .eth_dev_init = eth_ixgbe_bifurc_dev_init,
> + .dev_private_size = sizeof(struct ixgbe_adapter),
> +};
> +
> +/*
> + * Driver initialization routine.
> + * Invoked once at EAL init time.
> + * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
> + */
> +static int
> +rte_ixgbe_bifurc_pmd_init(const char *name __rte_unused,
> + const char *params __rte_unused)
> +{
> + PMD_INIT_FUNC_TRACE();
> +
> + rte_eth_driver_register(&rte_ixgbe_bifurc_pmd);
> + return 0;
> +}
> +
> +static struct rte_driver rte_ixgbe_bifurc_driver = {
> + .type = PMD_PDEV,
> + .init = rte_ixgbe_bifurc_pmd_init,
> +};
> +
> +PMD_REGISTER_DRIVER(rte_ixgbe_bifurc_driver);
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> new file mode 100644
> index 0000000..d40b21d
> --- /dev/null
> +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> @@ -0,0 +1,57 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _IXGBE_BIFFURCATE_H_
> +#define _IXGBE_BIFFURCATE_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#define IXGBE_82599_MAX_RX_QUEUES 128
> +
> +#define RTE_PMD_PACKET_RING_SPLITOFF_LOWER_LIMIT 32
> +#define RTE_PMD_PACKET_MAX_RING_PAIRS IXGBE_82599_MAX_RX_QUEUES
> +
> +
> +/**
> + * For use by the EAL only. Called as part of EAL init to set up any dummy NICs
> + * configured on command line.
> + */
> +int rte_ixgbe_bfc_pmd_init(const char *name, const char *params);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index e240376..2d32907 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -100,6 +100,12 @@ rte_rxmbuf_alloc(struct rte_mempool *mp)
> return (m);
> }
>
> +static inline uint16_t
> +ixgbe_dev_queue_offset(struct rte_eth_dev *dev)
> +{
> + return (RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> + 0 : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx;
> +}
>
> #if 1
> #define RTE_PMD_USE_PREFETCH
> @@ -1726,6 +1732,17 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
> unsigned int socket_id,
> const struct rte_eth_txconf *tx_conf)
> {
> + uint16_t offset = ixgbe_dev_queue_offset(dev);
> + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> + nb_desc, socket_id, tx_conf);
> +}
> +
> +int
> +ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx, uint16_t offset,
> + uint16_t nb_desc, unsigned int socket_id,
> + const struct rte_eth_txconf *tx_conf)
> +{
> const struct rte_memzone *tz;
> struct igb_tx_queue *txq;
> struct ixgbe_hw *hw;
> @@ -1849,8 +1866,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
> txq->hthresh = tx_conf->tx_thresh.hthresh;
> txq->wthresh = tx_conf->tx_thresh.wthresh;
> txq->queue_id = queue_idx;
> - txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
> + txq->reg_idx = queue_idx + offset;
> txq->port_id = dev->data->port_id;
> txq->txq_flags = tx_conf->txq_flags;
> txq->ops = &def_txq_ops;
> @@ -2083,6 +2099,18 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> const struct rte_eth_rxconf *rx_conf,
> struct rte_mempool *mp)
> {
> + uint16_t offset = ixgbe_dev_queue_offset(dev);
> + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> + socket_id, rx_conf, mp);
> +}
> +
> +int
> +ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx, uint16_t offset,
> + uint16_t nb_desc, unsigned int socket_id,
> + const struct rte_eth_rxconf *rx_conf,
> + struct rte_mempool *mp)
> +{
> const struct rte_memzone *rz;
> struct igb_rx_queue *rxq;
> struct ixgbe_hw *hw;
> @@ -2118,8 +2146,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> rxq->nb_rx_desc = nb_desc;
> rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> rxq->queue_id = queue_idx;
> - rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
> + rxq->reg_idx = queue_idx + offset;
> rxq->port_id = dev->data->port_id;
> rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
> 0 : ETHER_CRC_LEN);
> @@ -3402,9 +3429,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
> uint32_t fctrl;
> uint32_t hlreg0;
> uint32_t maxfrs;
> - uint32_t srrctl;
> uint32_t rdrxctl;
> uint32_t rxcsum;
> + uint32_t srrctl;
> uint16_t buf_size;
> uint16_t i;
>
> @@ -3684,9 +3711,9 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> struct ixgbe_hw *hw;
> struct igb_tx_queue *txq;
> struct igb_rx_queue *rxq;
> - uint32_t txdctl;
> uint32_t dmatxctl;
> uint32_t rxctrl;
> + uint32_t txdctl;
> uint16_t i;
>
> PMD_INIT_FUNC_TRACE();
> @@ -3731,7 +3758,6 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> if (hw->mac.type == ixgbe_mac_82599EB &&
> dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
> ixgbe_setup_loopback_link_82599(hw);
> -
> }
>
> /*
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> index eb89715..aeffb5f 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> @@ -243,6 +243,16 @@ struct ixgbe_txq_ops {
> IXGBE_ADVTXD_DCMD_DEXT |\
> IXGBE_ADVTXD_DCMD_EOP)
>
> +int ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx, uint16_t offset,
> + uint16_t nb_desc, unsigned int socket_id,
> + const struct rte_eth_txconf *tx_conf);
> +int ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx, uint16_t offset,
> + uint16_t nb_desc, unsigned int socket_id,
> + const struct rte_eth_rxconf *rx_conf,
> + struct rte_mempool *mp);
> +
> #ifdef RTE_IXGBE_INC_VECTOR
> uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t nb_pkts);
> --
> 1.8.1.4
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
2014-11-25 14:34 ` Bruce Richardson
@ 2014-11-25 14:48 ` Liang, Cunming
2014-11-25 15:01 ` Bruce Richardson
0 siblings, 1 reply; 24+ messages in thread
From: Liang, Cunming @ 2014-11-25 14:48 UTC (permalink / raw)
To: Richardson, Bruce; +Cc: dev
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Tuesday, November 25, 2014 10:34 PM
> To: Liang, Cunming
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
>
> On Tue, Nov 25, 2014 at 10:11:22PM +0800, Cunming Liang wrote:
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> +++++++++++++++++++++++++++++++++
> > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++++
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 40 ++++-
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 ++
> > 5 files changed, 415 insertions(+), 8 deletions(-)
> > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> >
>
> These changes are the ones that I'm not too sure about. I'd prefer if all
> material for the bifurcated driver be kept within the librte_pmd_bifurc directory.
[Liang, Cunming] I haven't a librte_pmd_bifurc library.
So far the purpose of librte_bifurc is for device scan, not used as a pmd.
During driver probe, depend on device id, it asks for correct pmd from 'librte_pmd_ixgbe, librte_pmd_i40e'.
> Is it possible to leave ixgbe largely unmodified and simply have the new
> bifurcated driver pull in the needed ixgbe (and later i40e) functions at
> compile time i.e. refer from one Makefile to the sources in the other
> driver's directory?
[Liang, Cunming] Nice point. If we have single directory gathering all direct ring access.
e.g. We have aka "librte_pmd_bifurc", inside it, we'll have bifurc_ixgbe, bifurc_i40e, ...
Each of them still depend on other libraries like librte_pmd_ixgbe/librte_pmd_i40e.
We may remove the internal dependence inside one pmd driver, but between libraries we add more.
> My thinking is that the bifurcated driver is so significantly different in
> the way it works, and the limits on it's functionality e.g. no direct filter
> support or queue management, that it's best kept completely separate and only
> "borrow" the needed descriptor read/write functions from the other drivers as is
> needed.
>
> Just my 2c. I'm curious as to what others think.
>
> /Bruce
>
> > diff --git a/lib/librte_pmd_ixgbe/Makefile b/lib/librte_pmd_ixgbe/Makefile
> > index 3588047..6867f17 100644
> > --- a/lib/librte_pmd_ixgbe/Makefile
> > +++ b/lib/librte_pmd_ixgbe/Makefile
> > @@ -37,7 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
> > LIB = librte_pmd_ixgbe.a
> >
> > CFLAGS += -O3
> > -CFLAGS += $(WERROR_FLAGS)
> > +CFLAGS += $(WERROR_FLAGS) -Wno-cast-qual
> >
> > ifeq ($(CC), icc)
> > #
> > @@ -108,10 +108,21 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) +=
> ixgbe_bypass.c
> > SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_82599_bypass.c
> > endif
> >
> > +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> > +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> > +SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bifurcate.c
> > +endif
> > +endif
> >
> > # this lib depends upon:
> > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_eal
> lib/librte_ether
> > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_mempool
> lib/librte_mbuf
> > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_net
> lib/librte_malloc
> > +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> > +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> > +DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_bifurc
> > +endif
> > +endif
> > +
> >
> > include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > new file mode 100644
> > index 0000000..84c445a
> > --- /dev/null
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > @@ -0,0 +1,303 @@
> > +/*-
> > + * BSD LICENSE
> > + *
> > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > + * All rights reserved.
> > + *
> > + * Redistribution and use in source and binary forms, with or without
> > + * modification, are permitted provided that the following conditions
> > + * are met:
> > + *
> > + * * Redistributions of source code must retain the above copyright
> > + * notice, this list of conditions and the following disclaimer.
> > + * * Redistributions in binary form must reproduce the above copyright
> > + * notice, this list of conditions and the following disclaimer in
> > + * the documentation and/or other materials provided with the
> > + * distribution.
> > + * * Neither the name of Intel Corporation nor the names of its
> > + * contributors may be used to endorse or promote products derived
> > + * from this software without specific prior written permission.
> > + *
> > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> > + */
> > +
> > +#include <rte_eal.h>
> > +#include <rte_malloc.h>
> > +#include <rte_memzone.h>
> > +#include <rte_dev.h>
> > +#include <rte_pci.h>
> > +#include <rte_ethdev.h>
> > +#include <rte_kvargs.h>
> > +#include <rte_bifurc.h>
> > +
> > +#include <linux/if_ether.h>
> > +#include <linux/if_packet.h>
> > +#include <arpa/inet.h>
> > +#include <net/if.h>
> > +#include <sys/types.h>
> > +#include <sys/socket.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/mman.h>
> > +#include <unistd.h>
> > +#include <poll.h>
> > +#include <errno.h>
> > +
> > +#include "ixgbe_logs.h"
> > +#include "ixgbe_ethdev.h"
> > +#include "ixgbe/ixgbe_api.h"
> > +
> > +#include <rte_mbuf.h>
> > +#include <rte_bifurc.h>
> > +#include "ixgbe_rxtx.h"
> > +
> > +static int
> > +ixgbe_dev_bfc_configure(struct rte_eth_dev *dev __rte_unused)
> > +{
> > + return 0;
> > +}
> > +
> > +static void
> > +ixgbe_dev_bfc_info(struct rte_eth_dev *dev,
> > + struct rte_eth_dev_info *dev_info)
> > +{
> > + rte_bifurc_ethdev_get_info(dev, dev_info);
> > +}
> > +
> > +static void
> > +ixgbe_dev_bfc_stats_get(__rte_unused struct rte_eth_dev *dev,
> > + __rte_unused struct rte_eth_stats *igb_stats)
> > +{
> > + return;
> > +}
> > +
> > +static int
> > +ixgbe_dev_bfc_start(struct rte_eth_dev *dev)
> > +{
> > + int err;
> > +
> > + /* initialize transmission unit */
> > + ixgbe_dev_tx_init(dev);
> > +
> > + /* This can fail when allocating mbufs for descriptor rings */
> > + err = ixgbe_dev_rx_init(dev);
> > + if (err) {
> > + PMD_INIT_LOG(ERR, "Unable to initialize RX hardware\n");
> > + goto error;
> > + }
> > +
> > + ixgbe_dev_rxtx_start(dev);
> > +
> > + return 0;
> > +
> > +error:
> > + PMD_INIT_LOG(ERR, "failure in ixgbe_dev_start(): %d", err);
> > + ixgbe_dev_clear_queues(dev);
> > + return -EIO;
> > +}
> > +
> > +static void
> > +ixgbe_dev_bfc_stop(struct rte_eth_dev *dev)
> > +{
> > + unsigned i;
> > +
> > + PMD_INIT_FUNC_TRACE();
> > +
> > + for (i = 0; i < dev->data->nb_tx_queues; i++)
> > + ixgbe_dev_tx_queue_stop(dev, i);
> > +
> > + for (i = 0; i < dev->data->nb_rx_queues; i++)
> > + ixgbe_dev_rx_queue_stop(dev, i);
> > +}
> > +
> > +static void
> > +ixgbe_dev_bfc_close(struct rte_eth_dev *dev)
> > +{
> > + ixgbe_dev_bfc_stop(dev);
> > +
> > + rte_bifurc_ethdev_free(dev);
> > +}
> > +
> > +static inline int
> > +rte_ixgbe_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> > + struct rte_eth_link *link)
> > +{
> > + struct rte_eth_link *dst = &(dev->data->dev_link);
> > + struct rte_eth_link *src = link;
> > +
> > + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> > + *(uint64_t *)src) == 0)
> > + return -1;
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +ixgbe_dev_bfc_link_update(__rte_unused struct rte_eth_dev *dev,
> > + __rte_unused int wait_to_complete)
> > +{
> > + struct rte_eth_link link;
> > +
> > + link.link_status = 1;
> > + link.link_duplex = ETH_LINK_FULL_DUPLEX;
> > + link.link_speed = ETH_LINK_SPEED_10000;
> > +
> > + rte_ixgbe_dev_atomic_write_link_status(dev, &link);
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +ixgbe_dev_bfc_rx_queue_setup(struct rte_eth_dev *dev,
> > + uint16_t queue_idx,
> > + uint16_t nb_desc,
> > + unsigned int socket_id,
> > + const struct rte_eth_rxconf *rx_conf,
> > + struct rte_mempool *mp)
> > +{
> > + uint16_t offset = rte_bifurc_qp_base(dev);
> > + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> > + socket_id, rx_conf, mp);
> > +}
> > +
> > +static int
> > +ixgbe_dev_bfc_tx_queue_setup(struct rte_eth_dev *dev,
> > + uint16_t queue_idx,
> > + uint16_t nb_desc,
> > + unsigned int socket_id,
> > + const struct rte_eth_txconf *tx_conf)
> > +{
> > + uint16_t offset = rte_bifurc_qp_base(dev);
> > + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> > + nb_desc, socket_id, tx_conf);
> > +}
> > +
> > +static struct eth_dev_ops ixgbe_bifurc_ops = {
> > + .dev_start = ixgbe_dev_bfc_start,
> > + .dev_stop = ixgbe_dev_bfc_stop,
> > + .dev_close = ixgbe_dev_bfc_close,
> > + .dev_configure = ixgbe_dev_bfc_configure,
> > + .dev_infos_get = ixgbe_dev_bfc_info,
> > + .rx_queue_setup = ixgbe_dev_bfc_rx_queue_setup,
> > + .tx_queue_setup = ixgbe_dev_bfc_tx_queue_setup,
> > + .rx_queue_release = ixgbe_dev_rx_queue_release,
> > + .tx_queue_release = ixgbe_dev_tx_queue_release,
> > + .link_update = ixgbe_dev_bfc_link_update,
> > + .stats_get = ixgbe_dev_bfc_stats_get,
> > + .stats_reset = NULL,
> > +};
> > +
> > +static int
> > +eth_ixgbe_bifurc_dev_init(struct eth_driver *eth_drv __rte_unused,
> > + struct rte_eth_dev *eth_dev)
> > +{
> > + struct rte_pci_device *pci_dev;
> > + struct ixgbe_hw *hw =
> > + IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
> > + int diag;
> > +
> > + PMD_INIT_FUNC_TRACE();
> > +
> > + eth_dev->dev_ops = &ixgbe_bifurc_ops;
> > + eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
> > + eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> > +
> > + /* for secondary processes, we don't initialise any further as primary
> > + * has already done this work. Only check we don't need a different
> > + * RX function */
> > + if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > + if (eth_dev->data->scattered_rx)
> > + eth_dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
> > + return 0;
> > + }
> > + pci_dev = eth_dev->pci_dev;
> > +
> > + /* Vendor and Device ID need to be set before init of shared code */
> > + hw->device_id = pci_dev->id.device_id;
> > + hw->vendor_id = pci_dev->id.vendor_id;
> > + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
> > +
> > +#ifdef RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP
> > + hw->allow_unsupported_sfp = 1;
> > +#endif
> > +
> > + /* Initialize the shared code (base driver) */
> > +#ifdef RTE_NIC_BYPASS
> > + diag = ixgbe_bypass_init_shared_code(hw);
> > +#else
> > + diag = ixgbe_init_shared_code(hw);
> > +#endif /* RTE_NIC_BYPASS */
> > +
> > + if (diag != IXGBE_SUCCESS) {
> > + PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
> > + return -EIO;
> > + }
> > +
> > + /* Allocate memory for storing MAC addresses */
> > + eth_dev->data->mac_addrs = rte_zmalloc("ixgbe", ETHER_ADDR_LEN *
> > + hw->mac.num_rar_entries, 0);
> > + if (eth_dev->data->mac_addrs == NULL) {
> > + PMD_INIT_LOG(ERR,
> > + "Failed to allocate %u bytes needed to store "
> > + "MAC addresses",
> > + ETHER_ADDR_LEN * hw->mac.num_rar_entries);
> > + return -ENOMEM;
> > + }
> > + rte_bifurc_mac_addr(eth_dev, ð_dev->data->mac_addrs[0]);
> > +
> > + return diag;
> > +}
> > +
> > +/*
> > + * The set of PCI devices this driver supports
> > + */
> > +static struct rte_pci_id pci_id_ixgbe_map[] = {
> > +
> > +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend,
> dev)},
> > +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend,
> dev)},
> > +#include "rte_pci_dev_ids.h"
> > +
> > +{ .vendor_id = 0, /* sentinel */ },
> > +};
> > +
> > +static struct eth_driver rte_ixgbe_bifurc_pmd = {
> > + {
> > + .name = "rte_ixgbe_bifurc_pmd",
> > + .id_table = pci_id_ixgbe_map,
> > + .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
> > + RTE_PCI_DRV_BIFURC,
> > + },
> > + .eth_dev_init = eth_ixgbe_bifurc_dev_init,
> > + .dev_private_size = sizeof(struct ixgbe_adapter),
> > +};
> > +
> > +/*
> > + * Driver initialization routine.
> > + * Invoked once at EAL init time.
> > + * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
> > + */
> > +static int
> > +rte_ixgbe_bifurc_pmd_init(const char *name __rte_unused,
> > + const char *params __rte_unused)
> > +{
> > + PMD_INIT_FUNC_TRACE();
> > +
> > + rte_eth_driver_register(&rte_ixgbe_bifurc_pmd);
> > + return 0;
> > +}
> > +
> > +static struct rte_driver rte_ixgbe_bifurc_driver = {
> > + .type = PMD_PDEV,
> > + .init = rte_ixgbe_bifurc_pmd_init,
> > +};
> > +
> > +PMD_REGISTER_DRIVER(rte_ixgbe_bifurc_driver);
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > new file mode 100644
> > index 0000000..d40b21d
> > --- /dev/null
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > @@ -0,0 +1,57 @@
> > +/*-
> > + * BSD LICENSE
> > + *
> > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > + * All rights reserved.
> > + *
> > + * Redistribution and use in source and binary forms, with or without
> > + * modification, are permitted provided that the following conditions
> > + * are met:
> > + *
> > + * * Redistributions of source code must retain the above copyright
> > + * notice, this list of conditions and the following disclaimer.
> > + * * Redistributions in binary form must reproduce the above copyright
> > + * notice, this list of conditions and the following disclaimer in
> > + * the documentation and/or other materials provided with the
> > + * distribution.
> > + * * Neither the name of Intel Corporation nor the names of its
> > + * contributors may be used to endorse or promote products derived
> > + * from this software without specific prior written permission.
> > + *
> > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> > + */
> > +
> > +#ifndef _IXGBE_BIFFURCATE_H_
> > +#define _IXGBE_BIFFURCATE_H_
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +#define IXGBE_82599_MAX_RX_QUEUES 128
> > +
> > +#define RTE_PMD_PACKET_RING_SPLITOFF_LOWER_LIMIT 32
> > +#define RTE_PMD_PACKET_MAX_RING_PAIRS
> IXGBE_82599_MAX_RX_QUEUES
> > +
> > +
> > +/**
> > + * For use by the EAL only. Called as part of EAL init to set up any dummy NICs
> > + * configured on command line.
> > + */
> > +int rte_ixgbe_bfc_pmd_init(const char *name, const char *params);
> > +
> > +#ifdef __cplusplus
> > +}
> > +#endif
> > +
> > +#endif
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index e240376..2d32907 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -100,6 +100,12 @@ rte_rxmbuf_alloc(struct rte_mempool *mp)
> > return (m);
> > }
> >
> > +static inline uint16_t
> > +ixgbe_dev_queue_offset(struct rte_eth_dev *dev)
> > +{
> > + return (RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > + 0 : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx;
> > +}
> >
> > #if 1
> > #define RTE_PMD_USE_PREFETCH
> > @@ -1726,6 +1732,17 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev
> *dev,
> > unsigned int socket_id,
> > const struct rte_eth_txconf *tx_conf)
> > {
> > + uint16_t offset = ixgbe_dev_queue_offset(dev);
> > + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> > + nb_desc, socket_id, tx_conf);
> > +}
> > +
> > +int
> > +ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> > + uint16_t queue_idx, uint16_t offset,
> > + uint16_t nb_desc, unsigned int socket_id,
> > + const struct rte_eth_txconf *tx_conf)
> > +{
> > const struct rte_memzone *tz;
> > struct igb_tx_queue *txq;
> > struct ixgbe_hw *hw;
> > @@ -1849,8 +1866,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev
> *dev,
> > txq->hthresh = tx_conf->tx_thresh.hthresh;
> > txq->wthresh = tx_conf->tx_thresh.wthresh;
> > txq->queue_id = queue_idx;
> > - txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
> > + txq->reg_idx = queue_idx + offset;
> > txq->port_id = dev->data->port_id;
> > txq->txq_flags = tx_conf->txq_flags;
> > txq->ops = &def_txq_ops;
> > @@ -2083,6 +2099,18 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev
> *dev,
> > const struct rte_eth_rxconf *rx_conf,
> > struct rte_mempool *mp)
> > {
> > + uint16_t offset = ixgbe_dev_queue_offset(dev);
> > + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> > + socket_id, rx_conf, mp);
> > +}
> > +
> > +int
> > +ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> > + uint16_t queue_idx, uint16_t offset,
> > + uint16_t nb_desc, unsigned int socket_id,
> > + const struct rte_eth_rxconf *rx_conf,
> > + struct rte_mempool *mp)
> > +{
> > const struct rte_memzone *rz;
> > struct igb_rx_queue *rxq;
> > struct ixgbe_hw *hw;
> > @@ -2118,8 +2146,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev
> *dev,
> > rxq->nb_rx_desc = nb_desc;
> > rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> > rxq->queue_id = queue_idx;
> > - rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
> > + rxq->reg_idx = queue_idx + offset;
> > rxq->port_id = dev->data->port_id;
> > rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
> > 0 : ETHER_CRC_LEN);
> > @@ -3402,9 +3429,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
> > uint32_t fctrl;
> > uint32_t hlreg0;
> > uint32_t maxfrs;
> > - uint32_t srrctl;
> > uint32_t rdrxctl;
> > uint32_t rxcsum;
> > + uint32_t srrctl;
> > uint16_t buf_size;
> > uint16_t i;
> >
> > @@ -3684,9 +3711,9 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> > struct ixgbe_hw *hw;
> > struct igb_tx_queue *txq;
> > struct igb_rx_queue *rxq;
> > - uint32_t txdctl;
> > uint32_t dmatxctl;
> > uint32_t rxctrl;
> > + uint32_t txdctl;
> > uint16_t i;
> >
> > PMD_INIT_FUNC_TRACE();
> > @@ -3731,7 +3758,6 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> > if (hw->mac.type == ixgbe_mac_82599EB &&
> > dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
> > ixgbe_setup_loopback_link_82599(hw);
> > -
> > }
> >
> > /*
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > index eb89715..aeffb5f 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > @@ -243,6 +243,16 @@ struct ixgbe_txq_ops {
> > IXGBE_ADVTXD_DCMD_DEXT |\
> > IXGBE_ADVTXD_DCMD_EOP)
> >
> > +int ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> > + uint16_t queue_idx, uint16_t offset,
> > + uint16_t nb_desc, unsigned int socket_id,
> > + const struct rte_eth_txconf *tx_conf);
> > +int ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> > + uint16_t queue_idx, uint16_t offset,
> > + uint16_t nb_desc, unsigned int socket_id,
> > + const struct rte_eth_rxconf *rx_conf,
> > + struct rte_mempool *mp);
> > +
> > #ifdef RTE_IXGBE_INC_VECTOR
> > uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> > uint16_t nb_pkts);
> > --
> > 1.8.1.4
> >
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
2014-11-25 14:48 ` Liang, Cunming
@ 2014-11-25 15:01 ` Bruce Richardson
2014-11-26 8:22 ` Liang, Cunming
0 siblings, 1 reply; 24+ messages in thread
From: Bruce Richardson @ 2014-11-25 15:01 UTC (permalink / raw)
To: Liang, Cunming; +Cc: dev
On Tue, Nov 25, 2014 at 02:48:51PM +0000, Liang, Cunming wrote:
>
>
> > -----Original Message-----
> > From: Richardson, Bruce
> > Sent: Tuesday, November 25, 2014 10:34 PM
> > To: Liang, Cunming
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
> >
> > On Tue, Nov 25, 2014 at 10:11:22PM +0800, Cunming Liang wrote:
> > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > ---
> > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > +++++++++++++++++++++++++++++++++
> > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++++
> > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 40 ++++-
> > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 ++
> > > 5 files changed, 415 insertions(+), 8 deletions(-)
> > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > >
> >
> > These changes are the ones that I'm not too sure about. I'd prefer if all
> > material for the bifurcated driver be kept within the librte_pmd_bifurc directory.
> [Liang, Cunming] I haven't a librte_pmd_bifurc library.
> So far the purpose of librte_bifurc is for device scan, not used as a pmd.
> During driver probe, depend on device id, it asks for correct pmd from 'librte_pmd_ixgbe, librte_pmd_i40e'.
>
> > Is it possible to leave ixgbe largely unmodified and simply have the new
> > bifurcated driver pull in the needed ixgbe (and later i40e) functions at
> > compile time i.e. refer from one Makefile to the sources in the other
> > driver's directory?
> [Liang, Cunming] Nice point. If we have single directory gathering all direct ring access.
> e.g. We have aka "librte_pmd_bifurc", inside it, we'll have bifurc_ixgbe, bifurc_i40e, ...
> Each of them still depend on other libraries like librte_pmd_ixgbe/librte_pmd_i40e.
> We may remove the internal dependence inside one pmd driver, but between libraries we add more.
I'm not sure about all that. Two points:
* Why would we need separate subdirectories within the bifurcated driver directory?
The *only* thing that is different between an implementation of ixgbe and i40e to
use the bifurcated driver infrastructure is the code to map between NIC descriptors
and rte_mbufs. All the other code would be identical as far as I can work out. So the
only two routines that differ are going to be the rx_burst and tx_burst functions.
So why not just pull in those two specific functions (or sets of functions) from
their respective drivers, and keep the rest of the codebase common? It's surely
simpler than having the ixgbe driver having to be aware of whether it's operating
in bifurcated mode or uio/vfio/nic_uio mode, to check what operations are supported
or not.
* It's not really an inter-library dependency - or at least not a hugely problematic
one to my mind. With my proposal there is no need for the ixgbe or i40e drivers to
be compiled up for the bifurcated driver to work with them. It simply makes use
of the rx and tx code functions to do the mapping from descriptors to mbufs. While
there will be a dependency on those functions, the nice thing is that those functions
are already standardized by the ethdev API, so we don't need to worry about
internal changes inside the drivers changing the APIs of those functions.
/Bruce
>
> > My thinking is that the bifurcated driver is so significantly different in
> > the way it works, and the limits on it's functionality e.g. no direct filter
> > support or queue management, that it's best kept completely separate and only
> > "borrow" the needed descriptor read/write functions from the other drivers as is
> > needed.
> >
> > Just my 2c. I'm curious as to what others think.
> >
> > /Bruce
> >
> > > diff --git a/lib/librte_pmd_ixgbe/Makefile b/lib/librte_pmd_ixgbe/Makefile
> > > index 3588047..6867f17 100644
> > > --- a/lib/librte_pmd_ixgbe/Makefile
> > > +++ b/lib/librte_pmd_ixgbe/Makefile
> > > @@ -37,7 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
> > > LIB = librte_pmd_ixgbe.a
> > >
> > > CFLAGS += -O3
> > > -CFLAGS += $(WERROR_FLAGS)
> > > +CFLAGS += $(WERROR_FLAGS) -Wno-cast-qual
> > >
> > > ifeq ($(CC), icc)
> > > #
> > > @@ -108,10 +108,21 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) +=
> > ixgbe_bypass.c
> > > SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_82599_bypass.c
> > > endif
> > >
> > > +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> > > +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> > > +SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bifurcate.c
> > > +endif
> > > +endif
> > >
> > > # this lib depends upon:
> > > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_eal
> > lib/librte_ether
> > > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_mempool
> > lib/librte_mbuf
> > > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_net
> > lib/librte_malloc
> > > +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> > > +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> > > +DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_bifurc
> > > +endif
> > > +endif
> > > +
> > >
> > > include $(RTE_SDK)/mk/rte.lib.mk
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > new file mode 100644
> > > index 0000000..84c445a
> > > --- /dev/null
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > @@ -0,0 +1,303 @@
> > > +/*-
> > > + * BSD LICENSE
> > > + *
> > > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > > + * All rights reserved.
> > > + *
> > > + * Redistribution and use in source and binary forms, with or without
> > > + * modification, are permitted provided that the following conditions
> > > + * are met:
> > > + *
> > > + * * Redistributions of source code must retain the above copyright
> > > + * notice, this list of conditions and the following disclaimer.
> > > + * * Redistributions in binary form must reproduce the above copyright
> > > + * notice, this list of conditions and the following disclaimer in
> > > + * the documentation and/or other materials provided with the
> > > + * distribution.
> > > + * * Neither the name of Intel Corporation nor the names of its
> > > + * contributors may be used to endorse or promote products derived
> > > + * from this software without specific prior written permission.
> > > + *
> > > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > CONTRIBUTORS
> > > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > NOT
> > > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > FITNESS FOR
> > > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > COPYRIGHT
> > > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > INCIDENTAL,
> > > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> > NOT
> > > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> > OF USE,
> > > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> > ON ANY
> > > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> > THE USE
> > > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > > + */
> > > +
> > > +#include <rte_eal.h>
> > > +#include <rte_malloc.h>
> > > +#include <rte_memzone.h>
> > > +#include <rte_dev.h>
> > > +#include <rte_pci.h>
> > > +#include <rte_ethdev.h>
> > > +#include <rte_kvargs.h>
> > > +#include <rte_bifurc.h>
> > > +
> > > +#include <linux/if_ether.h>
> > > +#include <linux/if_packet.h>
> > > +#include <arpa/inet.h>
> > > +#include <net/if.h>
> > > +#include <sys/types.h>
> > > +#include <sys/socket.h>
> > > +#include <sys/ioctl.h>
> > > +#include <sys/mman.h>
> > > +#include <unistd.h>
> > > +#include <poll.h>
> > > +#include <errno.h>
> > > +
> > > +#include "ixgbe_logs.h"
> > > +#include "ixgbe_ethdev.h"
> > > +#include "ixgbe/ixgbe_api.h"
> > > +
> > > +#include <rte_mbuf.h>
> > > +#include <rte_bifurc.h>
> > > +#include "ixgbe_rxtx.h"
> > > +
> > > +static int
> > > +ixgbe_dev_bfc_configure(struct rte_eth_dev *dev __rte_unused)
> > > +{
> > > + return 0;
> > > +}
> > > +
> > > +static void
> > > +ixgbe_dev_bfc_info(struct rte_eth_dev *dev,
> > > + struct rte_eth_dev_info *dev_info)
> > > +{
> > > + rte_bifurc_ethdev_get_info(dev, dev_info);
> > > +}
> > > +
> > > +static void
> > > +ixgbe_dev_bfc_stats_get(__rte_unused struct rte_eth_dev *dev,
> > > + __rte_unused struct rte_eth_stats *igb_stats)
> > > +{
> > > + return;
> > > +}
> > > +
> > > +static int
> > > +ixgbe_dev_bfc_start(struct rte_eth_dev *dev)
> > > +{
> > > + int err;
> > > +
> > > + /* initialize transmission unit */
> > > + ixgbe_dev_tx_init(dev);
> > > +
> > > + /* This can fail when allocating mbufs for descriptor rings */
> > > + err = ixgbe_dev_rx_init(dev);
> > > + if (err) {
> > > + PMD_INIT_LOG(ERR, "Unable to initialize RX hardware\n");
> > > + goto error;
> > > + }
> > > +
> > > + ixgbe_dev_rxtx_start(dev);
> > > +
> > > + return 0;
> > > +
> > > +error:
> > > + PMD_INIT_LOG(ERR, "failure in ixgbe_dev_start(): %d", err);
> > > + ixgbe_dev_clear_queues(dev);
> > > + return -EIO;
> > > +}
> > > +
> > > +static void
> > > +ixgbe_dev_bfc_stop(struct rte_eth_dev *dev)
> > > +{
> > > + unsigned i;
> > > +
> > > + PMD_INIT_FUNC_TRACE();
> > > +
> > > + for (i = 0; i < dev->data->nb_tx_queues; i++)
> > > + ixgbe_dev_tx_queue_stop(dev, i);
> > > +
> > > + for (i = 0; i < dev->data->nb_rx_queues; i++)
> > > + ixgbe_dev_rx_queue_stop(dev, i);
> > > +}
> > > +
> > > +static void
> > > +ixgbe_dev_bfc_close(struct rte_eth_dev *dev)
> > > +{
> > > + ixgbe_dev_bfc_stop(dev);
> > > +
> > > + rte_bifurc_ethdev_free(dev);
> > > +}
> > > +
> > > +static inline int
> > > +rte_ixgbe_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> > > + struct rte_eth_link *link)
> > > +{
> > > + struct rte_eth_link *dst = &(dev->data->dev_link);
> > > + struct rte_eth_link *src = link;
> > > +
> > > + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> > > + *(uint64_t *)src) == 0)
> > > + return -1;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int
> > > +ixgbe_dev_bfc_link_update(__rte_unused struct rte_eth_dev *dev,
> > > + __rte_unused int wait_to_complete)
> > > +{
> > > + struct rte_eth_link link;
> > > +
> > > + link.link_status = 1;
> > > + link.link_duplex = ETH_LINK_FULL_DUPLEX;
> > > + link.link_speed = ETH_LINK_SPEED_10000;
> > > +
> > > + rte_ixgbe_dev_atomic_write_link_status(dev, &link);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int
> > > +ixgbe_dev_bfc_rx_queue_setup(struct rte_eth_dev *dev,
> > > + uint16_t queue_idx,
> > > + uint16_t nb_desc,
> > > + unsigned int socket_id,
> > > + const struct rte_eth_rxconf *rx_conf,
> > > + struct rte_mempool *mp)
> > > +{
> > > + uint16_t offset = rte_bifurc_qp_base(dev);
> > > + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> > > + socket_id, rx_conf, mp);
> > > +}
> > > +
> > > +static int
> > > +ixgbe_dev_bfc_tx_queue_setup(struct rte_eth_dev *dev,
> > > + uint16_t queue_idx,
> > > + uint16_t nb_desc,
> > > + unsigned int socket_id,
> > > + const struct rte_eth_txconf *tx_conf)
> > > +{
> > > + uint16_t offset = rte_bifurc_qp_base(dev);
> > > + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> > > + nb_desc, socket_id, tx_conf);
> > > +}
> > > +
> > > +static struct eth_dev_ops ixgbe_bifurc_ops = {
> > > + .dev_start = ixgbe_dev_bfc_start,
> > > + .dev_stop = ixgbe_dev_bfc_stop,
> > > + .dev_close = ixgbe_dev_bfc_close,
> > > + .dev_configure = ixgbe_dev_bfc_configure,
> > > + .dev_infos_get = ixgbe_dev_bfc_info,
> > > + .rx_queue_setup = ixgbe_dev_bfc_rx_queue_setup,
> > > + .tx_queue_setup = ixgbe_dev_bfc_tx_queue_setup,
> > > + .rx_queue_release = ixgbe_dev_rx_queue_release,
> > > + .tx_queue_release = ixgbe_dev_tx_queue_release,
> > > + .link_update = ixgbe_dev_bfc_link_update,
> > > + .stats_get = ixgbe_dev_bfc_stats_get,
> > > + .stats_reset = NULL,
> > > +};
> > > +
> > > +static int
> > > +eth_ixgbe_bifurc_dev_init(struct eth_driver *eth_drv __rte_unused,
> > > + struct rte_eth_dev *eth_dev)
> > > +{
> > > + struct rte_pci_device *pci_dev;
> > > + struct ixgbe_hw *hw =
> > > + IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
> > > + int diag;
> > > +
> > > + PMD_INIT_FUNC_TRACE();
> > > +
> > > + eth_dev->dev_ops = &ixgbe_bifurc_ops;
> > > + eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
> > > + eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> > > +
> > > + /* for secondary processes, we don't initialise any further as primary
> > > + * has already done this work. Only check we don't need a different
> > > + * RX function */
> > > + if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > > + if (eth_dev->data->scattered_rx)
> > > + eth_dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
> > > + return 0;
> > > + }
> > > + pci_dev = eth_dev->pci_dev;
> > > +
> > > + /* Vendor and Device ID need to be set before init of shared code */
> > > + hw->device_id = pci_dev->id.device_id;
> > > + hw->vendor_id = pci_dev->id.vendor_id;
> > > + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
> > > +
> > > +#ifdef RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP
> > > + hw->allow_unsupported_sfp = 1;
> > > +#endif
> > > +
> > > + /* Initialize the shared code (base driver) */
> > > +#ifdef RTE_NIC_BYPASS
> > > + diag = ixgbe_bypass_init_shared_code(hw);
> > > +#else
> > > + diag = ixgbe_init_shared_code(hw);
> > > +#endif /* RTE_NIC_BYPASS */
> > > +
> > > + if (diag != IXGBE_SUCCESS) {
> > > + PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
> > > + return -EIO;
> > > + }
> > > +
> > > + /* Allocate memory for storing MAC addresses */
> > > + eth_dev->data->mac_addrs = rte_zmalloc("ixgbe", ETHER_ADDR_LEN *
> > > + hw->mac.num_rar_entries, 0);
> > > + if (eth_dev->data->mac_addrs == NULL) {
> > > + PMD_INIT_LOG(ERR,
> > > + "Failed to allocate %u bytes needed to store "
> > > + "MAC addresses",
> > > + ETHER_ADDR_LEN * hw->mac.num_rar_entries);
> > > + return -ENOMEM;
> > > + }
> > > + rte_bifurc_mac_addr(eth_dev, ð_dev->data->mac_addrs[0]);
> > > +
> > > + return diag;
> > > +}
> > > +
> > > +/*
> > > + * The set of PCI devices this driver supports
> > > + */
> > > +static struct rte_pci_id pci_id_ixgbe_map[] = {
> > > +
> > > +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend,
> > dev)},
> > > +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend,
> > dev)},
> > > +#include "rte_pci_dev_ids.h"
> > > +
> > > +{ .vendor_id = 0, /* sentinel */ },
> > > +};
> > > +
> > > +static struct eth_driver rte_ixgbe_bifurc_pmd = {
> > > + {
> > > + .name = "rte_ixgbe_bifurc_pmd",
> > > + .id_table = pci_id_ixgbe_map,
> > > + .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
> > > + RTE_PCI_DRV_BIFURC,
> > > + },
> > > + .eth_dev_init = eth_ixgbe_bifurc_dev_init,
> > > + .dev_private_size = sizeof(struct ixgbe_adapter),
> > > +};
> > > +
> > > +/*
> > > + * Driver initialization routine.
> > > + * Invoked once at EAL init time.
> > > + * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
> > > + */
> > > +static int
> > > +rte_ixgbe_bifurc_pmd_init(const char *name __rte_unused,
> > > + const char *params __rte_unused)
> > > +{
> > > + PMD_INIT_FUNC_TRACE();
> > > +
> > > + rte_eth_driver_register(&rte_ixgbe_bifurc_pmd);
> > > + return 0;
> > > +}
> > > +
> > > +static struct rte_driver rte_ixgbe_bifurc_driver = {
> > > + .type = PMD_PDEV,
> > > + .init = rte_ixgbe_bifurc_pmd_init,
> > > +};
> > > +
> > > +PMD_REGISTER_DRIVER(rte_ixgbe_bifurc_driver);
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > new file mode 100644
> > > index 0000000..d40b21d
> > > --- /dev/null
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > @@ -0,0 +1,57 @@
> > > +/*-
> > > + * BSD LICENSE
> > > + *
> > > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > > + * All rights reserved.
> > > + *
> > > + * Redistribution and use in source and binary forms, with or without
> > > + * modification, are permitted provided that the following conditions
> > > + * are met:
> > > + *
> > > + * * Redistributions of source code must retain the above copyright
> > > + * notice, this list of conditions and the following disclaimer.
> > > + * * Redistributions in binary form must reproduce the above copyright
> > > + * notice, this list of conditions and the following disclaimer in
> > > + * the documentation and/or other materials provided with the
> > > + * distribution.
> > > + * * Neither the name of Intel Corporation nor the names of its
> > > + * contributors may be used to endorse or promote products derived
> > > + * from this software without specific prior written permission.
> > > + *
> > > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > CONTRIBUTORS
> > > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > NOT
> > > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > FITNESS FOR
> > > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > COPYRIGHT
> > > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > INCIDENTAL,
> > > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> > NOT
> > > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> > OF USE,
> > > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> > ON ANY
> > > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> > THE USE
> > > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > > + */
> > > +
> > > +#ifndef _IXGBE_BIFFURCATE_H_
> > > +#define _IXGBE_BIFFURCATE_H_
> > > +
> > > +#ifdef __cplusplus
> > > +extern "C" {
> > > +#endif
> > > +
> > > +#define IXGBE_82599_MAX_RX_QUEUES 128
> > > +
> > > +#define RTE_PMD_PACKET_RING_SPLITOFF_LOWER_LIMIT 32
> > > +#define RTE_PMD_PACKET_MAX_RING_PAIRS
> > IXGBE_82599_MAX_RX_QUEUES
> > > +
> > > +
> > > +/**
> > > + * For use by the EAL only. Called as part of EAL init to set up any dummy NICs
> > > + * configured on command line.
> > > + */
> > > +int rte_ixgbe_bfc_pmd_init(const char *name, const char *params);
> > > +
> > > +#ifdef __cplusplus
> > > +}
> > > +#endif
> > > +
> > > +#endif
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > index e240376..2d32907 100644
> > > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > @@ -100,6 +100,12 @@ rte_rxmbuf_alloc(struct rte_mempool *mp)
> > > return (m);
> > > }
> > >
> > > +static inline uint16_t
> > > +ixgbe_dev_queue_offset(struct rte_eth_dev *dev)
> > > +{
> > > + return (RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > > + 0 : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx;
> > > +}
> > >
> > > #if 1
> > > #define RTE_PMD_USE_PREFETCH
> > > @@ -1726,6 +1732,17 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev
> > *dev,
> > > unsigned int socket_id,
> > > const struct rte_eth_txconf *tx_conf)
> > > {
> > > + uint16_t offset = ixgbe_dev_queue_offset(dev);
> > > + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> > > + nb_desc, socket_id, tx_conf);
> > > +}
> > > +
> > > +int
> > > +ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> > > + uint16_t queue_idx, uint16_t offset,
> > > + uint16_t nb_desc, unsigned int socket_id,
> > > + const struct rte_eth_txconf *tx_conf)
> > > +{
> > > const struct rte_memzone *tz;
> > > struct igb_tx_queue *txq;
> > > struct ixgbe_hw *hw;
> > > @@ -1849,8 +1866,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev
> > *dev,
> > > txq->hthresh = tx_conf->tx_thresh.hthresh;
> > > txq->wthresh = tx_conf->tx_thresh.wthresh;
> > > txq->queue_id = queue_idx;
> > > - txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > > - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
> > > + txq->reg_idx = queue_idx + offset;
> > > txq->port_id = dev->data->port_id;
> > > txq->txq_flags = tx_conf->txq_flags;
> > > txq->ops = &def_txq_ops;
> > > @@ -2083,6 +2099,18 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev
> > *dev,
> > > const struct rte_eth_rxconf *rx_conf,
> > > struct rte_mempool *mp)
> > > {
> > > + uint16_t offset = ixgbe_dev_queue_offset(dev);
> > > + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> > > + socket_id, rx_conf, mp);
> > > +}
> > > +
> > > +int
> > > +ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> > > + uint16_t queue_idx, uint16_t offset,
> > > + uint16_t nb_desc, unsigned int socket_id,
> > > + const struct rte_eth_rxconf *rx_conf,
> > > + struct rte_mempool *mp)
> > > +{
> > > const struct rte_memzone *rz;
> > > struct igb_rx_queue *rxq;
> > > struct ixgbe_hw *hw;
> > > @@ -2118,8 +2146,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev
> > *dev,
> > > rxq->nb_rx_desc = nb_desc;
> > > rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> > > rxq->queue_id = queue_idx;
> > > - rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > > - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
> > > + rxq->reg_idx = queue_idx + offset;
> > > rxq->port_id = dev->data->port_id;
> > > rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
> > > 0 : ETHER_CRC_LEN);
> > > @@ -3402,9 +3429,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
> > > uint32_t fctrl;
> > > uint32_t hlreg0;
> > > uint32_t maxfrs;
> > > - uint32_t srrctl;
> > > uint32_t rdrxctl;
> > > uint32_t rxcsum;
> > > + uint32_t srrctl;
> > > uint16_t buf_size;
> > > uint16_t i;
> > >
> > > @@ -3684,9 +3711,9 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> > > struct ixgbe_hw *hw;
> > > struct igb_tx_queue *txq;
> > > struct igb_rx_queue *rxq;
> > > - uint32_t txdctl;
> > > uint32_t dmatxctl;
> > > uint32_t rxctrl;
> > > + uint32_t txdctl;
> > > uint16_t i;
> > >
> > > PMD_INIT_FUNC_TRACE();
> > > @@ -3731,7 +3758,6 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> > > if (hw->mac.type == ixgbe_mac_82599EB &&
> > > dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
> > > ixgbe_setup_loopback_link_82599(hw);
> > > -
> > > }
> > >
> > > /*
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > index eb89715..aeffb5f 100644
> > > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > @@ -243,6 +243,16 @@ struct ixgbe_txq_ops {
> > > IXGBE_ADVTXD_DCMD_DEXT |\
> > > IXGBE_ADVTXD_DCMD_EOP)
> > >
> > > +int ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> > > + uint16_t queue_idx, uint16_t offset,
> > > + uint16_t nb_desc, unsigned int socket_id,
> > > + const struct rte_eth_txconf *tx_conf);
> > > +int ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> > > + uint16_t queue_idx, uint16_t offset,
> > > + uint16_t nb_desc, unsigned int socket_id,
> > > + const struct rte_eth_rxconf *rx_conf,
> > > + struct rte_mempool *mp);
> > > +
> > > #ifdef RTE_IXGBE_INC_VECTOR
> > > uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> > > uint16_t nb_pkts);
> > > --
> > > 1.8.1.4
> > >
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
2014-11-25 15:01 ` Bruce Richardson
@ 2014-11-26 8:22 ` Liang, Cunming
2014-11-26 10:35 ` Bruce Richardson
0 siblings, 1 reply; 24+ messages in thread
From: Liang, Cunming @ 2014-11-26 8:22 UTC (permalink / raw)
To: Richardson, Bruce; +Cc: dev
Thanks Bruce's valuable comments.
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Tuesday, November 25, 2014 11:01 PM
> To: Liang, Cunming
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
>
> On Tue, Nov 25, 2014 at 02:48:51PM +0000, Liang, Cunming wrote:
> >
> >
> > > -----Original Message-----
> > > From: Richardson, Bruce
> > > Sent: Tuesday, November 25, 2014 10:34 PM
> > > To: Liang, Cunming
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net
> device
> > >
> > > On Tue, Nov 25, 2014 at 10:11:22PM +0800, Cunming Liang wrote:
> > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > ---
> > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > > +++++++++++++++++++++++++++++++++
> > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++++
> > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 40 ++++-
> > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 ++
> > > > 5 files changed, 415 insertions(+), 8 deletions(-)
> > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > >
> > >
> > > These changes are the ones that I'm not too sure about. I'd prefer if all
> > > material for the bifurcated driver be kept within the librte_pmd_bifurc
> directory.
> > [Liang, Cunming] I haven't a librte_pmd_bifurc library.
> > So far the purpose of librte_bifurc is for device scan, not used as a pmd.
> > During driver probe, depend on device id, it asks for correct pmd from
> 'librte_pmd_ixgbe, librte_pmd_i40e'.
> >
> > > Is it possible to leave ixgbe largely unmodified and simply have the new
> > > bifurcated driver pull in the needed ixgbe (and later i40e) functions at
> > > compile time i.e. refer from one Makefile to the sources in the other
> > > driver's directory?
> > [Liang, Cunming] Nice point. If we have single directory gathering all direct ring
> access.
> > e.g. We have aka "librte_pmd_bifurc", inside it, we'll have bifurc_ixgbe,
> bifurc_i40e, ...
> > Each of them still depend on other libraries like
> librte_pmd_ixgbe/librte_pmd_i40e.
> > We may remove the internal dependence inside one pmd driver, but between
> libraries we add more.
>
> I'm not sure about all that. Two points:
>
> * Why would we need separate subdirectories within the bifurcated driver
> directory?
> The *only* thing that is different between an implementation of ixgbe and i40e
> to
> use the bifurcated driver infrastructure is the code to map between NIC
> descriptors
> and rte_mbufs. All the other code would be identical as far as I can work out. So
> the
> only two routines that differ are going to be the rx_burst and tx_burst functions.
[Liang, Cunming] Not really. If not using the fake page, we need to provide init/start/stop case by case.
> So why not just pull in those two specific functions (or sets of functions) from
> their respective drivers, and keep the rest of the codebase common?
[Liang, Cunming] I'm not sure all the rest of codebase can be common.
For rx/tx or queue_setup, we know it can, we already do it in xxx_rxtx.c. For other ops, may not.
Even for the part we can, if we provide such common method template, it looks like we still need to register 'ops'.
(e.g. xxx_init_shared_code, xxx_dev_tx/rx_init, xxx_dev_rxtx_start) They're not part of eth_dev_ops.
If we consider more like enable all other DPDK ethdev API (by using ioctl like ethtools does).
These message wrap and translation are definitely the case to put into such common codes.
So I agree with the idea to put more common method into librte_bifurc.
But don't think it's good to make it as a common PMD driver.
I still prefer ixgbe_bifurc.c in librte_pmd_ixgbe as an independent driver.
Per codebase common, rxtx common stuffs already done in xxx_rxtx.c.
Other common method provides by librte_bifurc, be used by each specific PMD.
> simpler than having the ixgbe driver having to be aware of whether it's operating
> in bifurcated mode or uio/vfio/nic_uio mode, to check what operations are
> supported
> or not.
[Liang, Cunming] If you go through the codes. You'll find it's not ixgbe driver to aware of these modes.
We already have ixgbe driver and ixgbevf driver, now have ixgbe_bifurc driver, that's it.
BTW ideally, it's better for ixgbe and ixgbevf in self-contain .c files, now all are in ixgbe_ethdev.c
Ixgbe_bifurc has weaker NIC control than ixgbevf, both are mainly focus on rx and tx.
Ixgbe has full HW control, ixgbevf has limited HW control, ixgbe_bifurc no HW control.
All of them has the same capability to do rx and tx.
On this point of view, it makes sense to have such standalone driver.
>
> * It's not really an inter-library dependency - or at least not a hugely problematic
> one to my mind. With my proposal there is no need for the ixgbe or i40e drivers
> to
> be compiled up for the bifurcated driver to work with them. It simply makes use
> of the rx and tx code functions to do the mapping from descriptors to mbufs.
> While
> there will be a dependency on those functions, the nice thing is that those
> functions
> are already standardized by the ethdev API, so we don't need to worry about
> internal changes inside the drivers changing the APIs of those functions.
[Liang, Cunming] I think I haven't fully got your point.
Do you propose we don't need the specific PMD bifurc, instead to provide a driver directly on top of all other PMD ?
We expose more low level function to ethdev API as needed.
In this way, there's a risk that we assume kernel always guarantee the illegal register access only goes into the fake pages.
If not, such register access by normal PMD is un-expectable.
>
> /Bruce
>
>
> >
> > > My thinking is that the bifurcated driver is so significantly different in
> > > the way it works, and the limits on it's functionality e.g. no direct filter
> > > support or queue management, that it's best kept completely separate and
> only
> > > "borrow" the needed descriptor read/write functions from the other drivers
> as is
> > > needed.
> > >
> > > Just my 2c. I'm curious as to what others think.
> > >
> > > /Bruce
> > >
> > > > diff --git a/lib/librte_pmd_ixgbe/Makefile b/lib/librte_pmd_ixgbe/Makefile
> > > > index 3588047..6867f17 100644
> > > > --- a/lib/librte_pmd_ixgbe/Makefile
> > > > +++ b/lib/librte_pmd_ixgbe/Makefile
> > > > @@ -37,7 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
> > > > LIB = librte_pmd_ixgbe.a
> > > >
> > > > CFLAGS += -O3
> > > > -CFLAGS += $(WERROR_FLAGS)
> > > > +CFLAGS += $(WERROR_FLAGS) -Wno-cast-qual
> > > >
> > > > ifeq ($(CC), icc)
> > > > #
> > > > @@ -108,10 +108,21 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) +=
> > > ixgbe_bypass.c
> > > > SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_82599_bypass.c
> > > > endif
> > > >
> > > > +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> > > > +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> > > > +SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bifurcate.c
> > > > +endif
> > > > +endif
> > > >
> > > > # this lib depends upon:
> > > > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_eal
> > > lib/librte_ether
> > > > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_mempool
> > > lib/librte_mbuf
> > > > DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_net
> > > lib/librte_malloc
> > > > +ifeq ($(CONFIG_RTE_LIBRTE_BIFURC),y)
> > > > +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> > > > +DEPDIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += lib/librte_bifurc
> > > > +endif
> > > > +endif
> > > > +
> > > >
> > > > include $(RTE_SDK)/mk/rte.lib.mk
> > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > new file mode 100644
> > > > index 0000000..84c445a
> > > > --- /dev/null
> > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > @@ -0,0 +1,303 @@
> > > > +/*-
> > > > + * BSD LICENSE
> > > > + *
> > > > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > > > + * All rights reserved.
> > > > + *
> > > > + * Redistribution and use in source and binary forms, with or without
> > > > + * modification, are permitted provided that the following conditions
> > > > + * are met:
> > > > + *
> > > > + * * Redistributions of source code must retain the above copyright
> > > > + * notice, this list of conditions and the following disclaimer.
> > > > + * * Redistributions in binary form must reproduce the above copyright
> > > > + * notice, this list of conditions and the following disclaimer in
> > > > + * the documentation and/or other materials provided with the
> > > > + * distribution.
> > > > + * * Neither the name of Intel Corporation nor the names of its
> > > > + * contributors may be used to endorse or promote products derived
> > > > + * from this software without specific prior written permission.
> > > > + *
> > > > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > > CONTRIBUTORS
> > > > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > > NOT
> > > > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > > FITNESS FOR
> > > > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > > COPYRIGHT
> > > > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > > INCIDENTAL,
> > > > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT
> > > NOT
> > > > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS
> > > OF USE,
> > > > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND
> > > ON ANY
> > > > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> > > > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF
> > > THE USE
> > > > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > > DAMAGE.
> > > > + */
> > > > +
> > > > +#include <rte_eal.h>
> > > > +#include <rte_malloc.h>
> > > > +#include <rte_memzone.h>
> > > > +#include <rte_dev.h>
> > > > +#include <rte_pci.h>
> > > > +#include <rte_ethdev.h>
> > > > +#include <rte_kvargs.h>
> > > > +#include <rte_bifurc.h>
> > > > +
> > > > +#include <linux/if_ether.h>
> > > > +#include <linux/if_packet.h>
> > > > +#include <arpa/inet.h>
> > > > +#include <net/if.h>
> > > > +#include <sys/types.h>
> > > > +#include <sys/socket.h>
> > > > +#include <sys/ioctl.h>
> > > > +#include <sys/mman.h>
> > > > +#include <unistd.h>
> > > > +#include <poll.h>
> > > > +#include <errno.h>
> > > > +
> > > > +#include "ixgbe_logs.h"
> > > > +#include "ixgbe_ethdev.h"
> > > > +#include "ixgbe/ixgbe_api.h"
> > > > +
> > > > +#include <rte_mbuf.h>
> > > > +#include <rte_bifurc.h>
> > > > +#include "ixgbe_rxtx.h"
> > > > +
> > > > +static int
> > > > +ixgbe_dev_bfc_configure(struct rte_eth_dev *dev __rte_unused)
> > > > +{
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static void
> > > > +ixgbe_dev_bfc_info(struct rte_eth_dev *dev,
> > > > + struct rte_eth_dev_info *dev_info)
> > > > +{
> > > > + rte_bifurc_ethdev_get_info(dev, dev_info);
> > > > +}
> > > > +
> > > > +static void
> > > > +ixgbe_dev_bfc_stats_get(__rte_unused struct rte_eth_dev *dev,
> > > > + __rte_unused struct rte_eth_stats *igb_stats)
> > > > +{
> > > > + return;
> > > > +}
> > > > +
> > > > +static int
> > > > +ixgbe_dev_bfc_start(struct rte_eth_dev *dev)
> > > > +{
> > > > + int err;
> > > > +
> > > > + /* initialize transmission unit */
> > > > + ixgbe_dev_tx_init(dev);
> > > > +
> > > > + /* This can fail when allocating mbufs for descriptor rings */
> > > > + err = ixgbe_dev_rx_init(dev);
> > > > + if (err) {
> > > > + PMD_INIT_LOG(ERR, "Unable to initialize RX hardware\n");
> > > > + goto error;
> > > > + }
> > > > +
> > > > + ixgbe_dev_rxtx_start(dev);
> > > > +
> > > > + return 0;
> > > > +
> > > > +error:
> > > > + PMD_INIT_LOG(ERR, "failure in ixgbe_dev_start(): %d", err);
> > > > + ixgbe_dev_clear_queues(dev);
> > > > + return -EIO;
> > > > +}
> > > > +
> > > > +static void
> > > > +ixgbe_dev_bfc_stop(struct rte_eth_dev *dev)
> > > > +{
> > > > + unsigned i;
> > > > +
> > > > + PMD_INIT_FUNC_TRACE();
> > > > +
> > > > + for (i = 0; i < dev->data->nb_tx_queues; i++)
> > > > + ixgbe_dev_tx_queue_stop(dev, i);
> > > > +
> > > > + for (i = 0; i < dev->data->nb_rx_queues; i++)
> > > > + ixgbe_dev_rx_queue_stop(dev, i);
> > > > +}
> > > > +
> > > > +static void
> > > > +ixgbe_dev_bfc_close(struct rte_eth_dev *dev)
> > > > +{
> > > > + ixgbe_dev_bfc_stop(dev);
> > > > +
> > > > + rte_bifurc_ethdev_free(dev);
> > > > +}
> > > > +
> > > > +static inline int
> > > > +rte_ixgbe_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> > > > + struct rte_eth_link *link)
> > > > +{
> > > > + struct rte_eth_link *dst = &(dev->data->dev_link);
> > > > + struct rte_eth_link *src = link;
> > > > +
> > > > + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> > > > + *(uint64_t *)src) == 0)
> > > > + return -1;
> > > > +
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static int
> > > > +ixgbe_dev_bfc_link_update(__rte_unused struct rte_eth_dev *dev,
> > > > + __rte_unused int wait_to_complete)
> > > > +{
> > > > + struct rte_eth_link link;
> > > > +
> > > > + link.link_status = 1;
> > > > + link.link_duplex = ETH_LINK_FULL_DUPLEX;
> > > > + link.link_speed = ETH_LINK_SPEED_10000;
> > > > +
> > > > + rte_ixgbe_dev_atomic_write_link_status(dev, &link);
> > > > +
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static int
> > > > +ixgbe_dev_bfc_rx_queue_setup(struct rte_eth_dev *dev,
> > > > + uint16_t queue_idx,
> > > > + uint16_t nb_desc,
> > > > + unsigned int socket_id,
> > > > + const struct rte_eth_rxconf *rx_conf,
> > > > + struct rte_mempool *mp)
> > > > +{
> > > > + uint16_t offset = rte_bifurc_qp_base(dev);
> > > > + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> > > > + socket_id, rx_conf, mp);
> > > > +}
> > > > +
> > > > +static int
> > > > +ixgbe_dev_bfc_tx_queue_setup(struct rte_eth_dev *dev,
> > > > + uint16_t queue_idx,
> > > > + uint16_t nb_desc,
> > > > + unsigned int socket_id,
> > > > + const struct rte_eth_txconf *tx_conf)
> > > > +{
> > > > + uint16_t offset = rte_bifurc_qp_base(dev);
> > > > + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> > > > + nb_desc, socket_id, tx_conf);
> > > > +}
> > > > +
> > > > +static struct eth_dev_ops ixgbe_bifurc_ops = {
> > > > + .dev_start = ixgbe_dev_bfc_start,
> > > > + .dev_stop = ixgbe_dev_bfc_stop,
> > > > + .dev_close = ixgbe_dev_bfc_close,
> > > > + .dev_configure = ixgbe_dev_bfc_configure,
> > > > + .dev_infos_get = ixgbe_dev_bfc_info,
> > > > + .rx_queue_setup = ixgbe_dev_bfc_rx_queue_setup,
> > > > + .tx_queue_setup = ixgbe_dev_bfc_tx_queue_setup,
> > > > + .rx_queue_release = ixgbe_dev_rx_queue_release,
> > > > + .tx_queue_release = ixgbe_dev_tx_queue_release,
> > > > + .link_update = ixgbe_dev_bfc_link_update,
> > > > + .stats_get = ixgbe_dev_bfc_stats_get,
> > > > + .stats_reset = NULL,
> > > > +};
> > > > +
> > > > +static int
> > > > +eth_ixgbe_bifurc_dev_init(struct eth_driver *eth_drv __rte_unused,
> > > > + struct rte_eth_dev *eth_dev)
> > > > +{
> > > > + struct rte_pci_device *pci_dev;
> > > > + struct ixgbe_hw *hw =
> > > > + IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
> > > > + int diag;
> > > > +
> > > > + PMD_INIT_FUNC_TRACE();
> > > > +
> > > > + eth_dev->dev_ops = &ixgbe_bifurc_ops;
> > > > + eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
> > > > + eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> > > > +
> > > > + /* for secondary processes, we don't initialise any further as primary
> > > > + * has already done this work. Only check we don't need a different
> > > > + * RX function */
> > > > + if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > > > + if (eth_dev->data->scattered_rx)
> > > > + eth_dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
> > > > + return 0;
> > > > + }
> > > > + pci_dev = eth_dev->pci_dev;
> > > > +
> > > > + /* Vendor and Device ID need to be set before init of shared code */
> > > > + hw->device_id = pci_dev->id.device_id;
> > > > + hw->vendor_id = pci_dev->id.vendor_id;
> > > > + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
> > > > +
> > > > +#ifdef RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP
> > > > + hw->allow_unsupported_sfp = 1;
> > > > +#endif
> > > > +
> > > > + /* Initialize the shared code (base driver) */
> > > > +#ifdef RTE_NIC_BYPASS
> > > > + diag = ixgbe_bypass_init_shared_code(hw);
> > > > +#else
> > > > + diag = ixgbe_init_shared_code(hw);
> > > > +#endif /* RTE_NIC_BYPASS */
> > > > +
> > > > + if (diag != IXGBE_SUCCESS) {
> > > > + PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
> > > > + return -EIO;
> > > > + }
> > > > +
> > > > + /* Allocate memory for storing MAC addresses */
> > > > + eth_dev->data->mac_addrs = rte_zmalloc("ixgbe", ETHER_ADDR_LEN *
> > > > + hw->mac.num_rar_entries, 0);
> > > > + if (eth_dev->data->mac_addrs == NULL) {
> > > > + PMD_INIT_LOG(ERR,
> > > > + "Failed to allocate %u bytes needed to store "
> > > > + "MAC addresses",
> > > > + ETHER_ADDR_LEN * hw->mac.num_rar_entries);
> > > > + return -ENOMEM;
> > > > + }
> > > > + rte_bifurc_mac_addr(eth_dev, ð_dev->data->mac_addrs[0]);
> > > > +
> > > > + return diag;
> > > > +}
> > > > +
> > > > +/*
> > > > + * The set of PCI devices this driver supports
> > > > + */
> > > > +static struct rte_pci_id pci_id_ixgbe_map[] = {
> > > > +
> > > > +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend,
> > > dev)},
> > > > +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev)
> {RTE_PCI_DEVICE(vend,
> > > dev)},
> > > > +#include "rte_pci_dev_ids.h"
> > > > +
> > > > +{ .vendor_id = 0, /* sentinel */ },
> > > > +};
> > > > +
> > > > +static struct eth_driver rte_ixgbe_bifurc_pmd = {
> > > > + {
> > > > + .name = "rte_ixgbe_bifurc_pmd",
> > > > + .id_table = pci_id_ixgbe_map,
> > > > + .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
> > > > + RTE_PCI_DRV_BIFURC,
> > > > + },
> > > > + .eth_dev_init = eth_ixgbe_bifurc_dev_init,
> > > > + .dev_private_size = sizeof(struct ixgbe_adapter),
> > > > +};
> > > > +
> > > > +/*
> > > > + * Driver initialization routine.
> > > > + * Invoked once at EAL init time.
> > > > + * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
> > > > + */
> > > > +static int
> > > > +rte_ixgbe_bifurc_pmd_init(const char *name __rte_unused,
> > > > + const char *params __rte_unused)
> > > > +{
> > > > + PMD_INIT_FUNC_TRACE();
> > > > +
> > > > + rte_eth_driver_register(&rte_ixgbe_bifurc_pmd);
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static struct rte_driver rte_ixgbe_bifurc_driver = {
> > > > + .type = PMD_PDEV,
> > > > + .init = rte_ixgbe_bifurc_pmd_init,
> > > > +};
> > > > +
> > > > +PMD_REGISTER_DRIVER(rte_ixgbe_bifurc_driver);
> > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > > new file mode 100644
> > > > index 0000000..d40b21d
> > > > --- /dev/null
> > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > > @@ -0,0 +1,57 @@
> > > > +/*-
> > > > + * BSD LICENSE
> > > > + *
> > > > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > > > + * All rights reserved.
> > > > + *
> > > > + * Redistribution and use in source and binary forms, with or without
> > > > + * modification, are permitted provided that the following conditions
> > > > + * are met:
> > > > + *
> > > > + * * Redistributions of source code must retain the above copyright
> > > > + * notice, this list of conditions and the following disclaimer.
> > > > + * * Redistributions in binary form must reproduce the above copyright
> > > > + * notice, this list of conditions and the following disclaimer in
> > > > + * the documentation and/or other materials provided with the
> > > > + * distribution.
> > > > + * * Neither the name of Intel Corporation nor the names of its
> > > > + * contributors may be used to endorse or promote products derived
> > > > + * from this software without specific prior written permission.
> > > > + *
> > > > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > > CONTRIBUTORS
> > > > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > > NOT
> > > > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > > FITNESS FOR
> > > > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > > COPYRIGHT
> > > > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > > INCIDENTAL,
> > > > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT
> > > NOT
> > > > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS
> > > OF USE,
> > > > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND
> > > ON ANY
> > > > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> > > > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF
> > > THE USE
> > > > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > > DAMAGE.
> > > > + */
> > > > +
> > > > +#ifndef _IXGBE_BIFFURCATE_H_
> > > > +#define _IXGBE_BIFFURCATE_H_
> > > > +
> > > > +#ifdef __cplusplus
> > > > +extern "C" {
> > > > +#endif
> > > > +
> > > > +#define IXGBE_82599_MAX_RX_QUEUES 128
> > > > +
> > > > +#define RTE_PMD_PACKET_RING_SPLITOFF_LOWER_LIMIT 32
> > > > +#define RTE_PMD_PACKET_MAX_RING_PAIRS
> > > IXGBE_82599_MAX_RX_QUEUES
> > > > +
> > > > +
> > > > +/**
> > > > + * For use by the EAL only. Called as part of EAL init to set up any dummy
> NICs
> > > > + * configured on command line.
> > > > + */
> > > > +int rte_ixgbe_bfc_pmd_init(const char *name, const char *params);
> > > > +
> > > > +#ifdef __cplusplus
> > > > +}
> > > > +#endif
> > > > +
> > > > +#endif
> > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > > index e240376..2d32907 100644
> > > > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > > @@ -100,6 +100,12 @@ rte_rxmbuf_alloc(struct rte_mempool *mp)
> > > > return (m);
> > > > }
> > > >
> > > > +static inline uint16_t
> > > > +ixgbe_dev_queue_offset(struct rte_eth_dev *dev)
> > > > +{
> > > > + return (RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > > > + 0 : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx;
> > > > +}
> > > >
> > > > #if 1
> > > > #define RTE_PMD_USE_PREFETCH
> > > > @@ -1726,6 +1732,17 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev
> > > *dev,
> > > > unsigned int socket_id,
> > > > const struct rte_eth_txconf *tx_conf)
> > > > {
> > > > + uint16_t offset = ixgbe_dev_queue_offset(dev);
> > > > + return ixgbe_dev_txq_setup(dev, queue_idx, offset,
> > > > + nb_desc, socket_id, tx_conf);
> > > > +}
> > > > +
> > > > +int
> > > > +ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> > > > + uint16_t queue_idx, uint16_t offset,
> > > > + uint16_t nb_desc, unsigned int socket_id,
> > > > + const struct rte_eth_txconf *tx_conf)
> > > > +{
> > > > const struct rte_memzone *tz;
> > > > struct igb_tx_queue *txq;
> > > > struct ixgbe_hw *hw;
> > > > @@ -1849,8 +1866,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev
> > > *dev,
> > > > txq->hthresh = tx_conf->tx_thresh.hthresh;
> > > > txq->wthresh = tx_conf->tx_thresh.wthresh;
> > > > txq->queue_id = queue_idx;
> > > > - txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > > > - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx +
> queue_idx);
> > > > + txq->reg_idx = queue_idx + offset;
> > > > txq->port_id = dev->data->port_id;
> > > > txq->txq_flags = tx_conf->txq_flags;
> > > > txq->ops = &def_txq_ops;
> > > > @@ -2083,6 +2099,18 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev
> > > *dev,
> > > > const struct rte_eth_rxconf *rx_conf,
> > > > struct rte_mempool *mp)
> > > > {
> > > > + uint16_t offset = ixgbe_dev_queue_offset(dev);
> > > > + return ixgbe_dev_rxq_setup(dev, queue_idx, offset, nb_desc,
> > > > + socket_id, rx_conf, mp);
> > > > +}
> > > > +
> > > > +int
> > > > +ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> > > > + uint16_t queue_idx, uint16_t offset,
> > > > + uint16_t nb_desc, unsigned int socket_id,
> > > > + const struct rte_eth_rxconf *rx_conf,
> > > > + struct rte_mempool *mp)
> > > > +{
> > > > const struct rte_memzone *rz;
> > > > struct igb_rx_queue *rxq;
> > > > struct ixgbe_hw *hw;
> > > > @@ -2118,8 +2146,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev
> > > *dev,
> > > > rxq->nb_rx_desc = nb_desc;
> > > > rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> > > > rxq->queue_id = queue_idx;
> > > > - rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
> > > > - queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx +
> queue_idx);
> > > > + rxq->reg_idx = queue_idx + offset;
> > > > rxq->port_id = dev->data->port_id;
> > > > rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
> > > > 0 : ETHER_CRC_LEN);
> > > > @@ -3402,9 +3429,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
> > > > uint32_t fctrl;
> > > > uint32_t hlreg0;
> > > > uint32_t maxfrs;
> > > > - uint32_t srrctl;
> > > > uint32_t rdrxctl;
> > > > uint32_t rxcsum;
> > > > + uint32_t srrctl;
> > > > uint16_t buf_size;
> > > > uint16_t i;
> > > >
> > > > @@ -3684,9 +3711,9 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> > > > struct ixgbe_hw *hw;
> > > > struct igb_tx_queue *txq;
> > > > struct igb_rx_queue *rxq;
> > > > - uint32_t txdctl;
> > > > uint32_t dmatxctl;
> > > > uint32_t rxctrl;
> > > > + uint32_t txdctl;
> > > > uint16_t i;
> > > >
> > > > PMD_INIT_FUNC_TRACE();
> > > > @@ -3731,7 +3758,6 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
> > > > if (hw->mac.type == ixgbe_mac_82599EB &&
> > > > dev->data->dev_conf.lpbk_mode ==
> IXGBE_LPBK_82599_TX_RX)
> > > > ixgbe_setup_loopback_link_82599(hw);
> > > > -
> > > > }
> > > >
> > > > /*
> > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > > index eb89715..aeffb5f 100644
> > > > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
> > > > @@ -243,6 +243,16 @@ struct ixgbe_txq_ops {
> > > > IXGBE_ADVTXD_DCMD_DEXT |\
> > > > IXGBE_ADVTXD_DCMD_EOP)
> > > >
> > > > +int ixgbe_dev_txq_setup(struct rte_eth_dev *dev,
> > > > + uint16_t queue_idx, uint16_t offset,
> > > > + uint16_t nb_desc, unsigned int socket_id,
> > > > + const struct rte_eth_txconf *tx_conf);
> > > > +int ixgbe_dev_rxq_setup(struct rte_eth_dev *dev,
> > > > + uint16_t queue_idx, uint16_t offset,
> > > > + uint16_t nb_desc, unsigned int socket_id,
> > > > + const struct rte_eth_rxconf *rx_conf,
> > > > + struct rte_mempool *mp);
> > > > +
> > > > #ifdef RTE_IXGBE_INC_VECTOR
> > > > uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> > > > uint16_t nb_pkts);
> > > > --
> > > > 1.8.1.4
> > > >
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
2014-11-26 8:22 ` Liang, Cunming
@ 2014-11-26 10:35 ` Bruce Richardson
0 siblings, 0 replies; 24+ messages in thread
From: Bruce Richardson @ 2014-11-26 10:35 UTC (permalink / raw)
To: Liang, Cunming; +Cc: dev
On Wed, Nov 26, 2014 at 08:22:05AM +0000, Liang, Cunming wrote:
> Thanks Bruce's valuable comments.
>
> > -----Original Message-----
> > From: Richardson, Bruce
> > Sent: Tuesday, November 25, 2014 11:01 PM
> > To: Liang, Cunming
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device
> >
> > On Tue, Nov 25, 2014 at 02:48:51PM +0000, Liang, Cunming wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Richardson, Bruce
> > > > Sent: Tuesday, November 25, 2014 10:34 PM
> > > > To: Liang, Cunming
> > > > Cc: dev@dpdk.org
> > > > Subject: Re: [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net
> > device
> > > >
> > > > On Tue, Nov 25, 2014 at 10:11:22PM +0800, Cunming Liang wrote:
> > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > ---
> > > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > > > +++++++++++++++++++++++++++++++++
> > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++++
> > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 40 ++++-
> > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 ++
> > > > > 5 files changed, 415 insertions(+), 8 deletions(-)
> > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > > >
> > > >
> > > > These changes are the ones that I'm not too sure about. I'd prefer if all
> > > > material for the bifurcated driver be kept within the librte_pmd_bifurc
> > directory.
> > > [Liang, Cunming] I haven't a librte_pmd_bifurc library.
> > > So far the purpose of librte_bifurc is for device scan, not used as a pmd.
> > > During driver probe, depend on device id, it asks for correct pmd from
> > 'librte_pmd_ixgbe, librte_pmd_i40e'.
> > >
> > > > Is it possible to leave ixgbe largely unmodified and simply have the new
> > > > bifurcated driver pull in the needed ixgbe (and later i40e) functions at
> > > > compile time i.e. refer from one Makefile to the sources in the other
> > > > driver's directory?
> > > [Liang, Cunming] Nice point. If we have single directory gathering all direct ring
> > access.
> > > e.g. We have aka "librte_pmd_bifurc", inside it, we'll have bifurc_ixgbe,
> > bifurc_i40e, ...
> > > Each of them still depend on other libraries like
> > librte_pmd_ixgbe/librte_pmd_i40e.
> > > We may remove the internal dependence inside one pmd driver, but between
> > libraries we add more.
> >
> > I'm not sure about all that. Two points:
> >
> > * Why would we need separate subdirectories within the bifurcated driver
> > directory?
> > The *only* thing that is different between an implementation of ixgbe and i40e
> > to
> > use the bifurcated driver infrastructure is the code to map between NIC
> > descriptors
> > and rte_mbufs. All the other code would be identical as far as I can work out. So
> > the
> > only two routines that differ are going to be the rx_burst and tx_burst functions.
> [Liang, Cunming] Not really. If not using the fake page, we need to provide init/start/stop case by case.
> > So why not just pull in those two specific functions (or sets of functions) from
> > their respective drivers, and keep the rest of the codebase common?
>
> [Liang, Cunming] I'm not sure all the rest of codebase can be common.
> For rx/tx or queue_setup, we know it can, we already do it in xxx_rxtx.c. For other ops, may not.
> Even for the part we can, if we provide such common method template, it looks like we still need to register 'ops'.
> (e.g. xxx_init_shared_code, xxx_dev_tx/rx_init, xxx_dev_rxtx_start) They're not part of eth_dev_ops.
> If we consider more like enable all other DPDK ethdev API (by using ioctl like ethtools does).
> These message wrap and translation are definitely the case to put into such common codes.
>
> So I agree with the idea to put more common method into librte_bifurc.
> But don't think it's good to make it as a common PMD driver.
> I still prefer ixgbe_bifurc.c in librte_pmd_ixgbe as an independent driver.
> Per codebase common, rxtx common stuffs already done in xxx_rxtx.c.
> Other common method provides by librte_bifurc, be used by each specific PMD.
>
> > simpler than having the ixgbe driver having to be aware of whether it's operating
> > in bifurcated mode or uio/vfio/nic_uio mode, to check what operations are
> > supported
> > or not.
> [Liang, Cunming] If you go through the codes. You'll find it's not ixgbe driver to aware of these modes.
> We already have ixgbe driver and ixgbevf driver, now have ixgbe_bifurc driver, that's it.
> BTW ideally, it's better for ixgbe and ixgbevf in self-contain .c files, now all are in ixgbe_ethdev.c
> Ixgbe_bifurc has weaker NIC control than ixgbevf, both are mainly focus on rx and tx.
> Ixgbe has full HW control, ixgbevf has limited HW control, ixgbe_bifurc no HW control.
> All of them has the same capability to do rx and tx.
> On this point of view, it makes sense to have such standalone driver.
> >
> > * It's not really an inter-library dependency - or at least not a hugely problematic
> > one to my mind. With my proposal there is no need for the ixgbe or i40e drivers
> > to
> > be compiled up for the bifurcated driver to work with them. It simply makes use
> > of the rx and tx code functions to do the mapping from descriptors to mbufs.
> > While
> > there will be a dependency on those functions, the nice thing is that those
> > functions
> > are already standardized by the ethdev API, so we don't need to worry about
> > internal changes inside the drivers changing the APIs of those functions.
> [Liang, Cunming] I think I haven't fully got your point.
> Do you propose we don't need the specific PMD bifurc, instead to provide a driver directly on top of all other PMD ?
> We expose more low level function to ethdev API as needed.
> In this way, there's a risk that we assume kernel always guarantee the illegal register access only goes into the fake pages.
> If not, such register access by normal PMD is un-expectable.
>
My main thinking is that the ethdev HW APIs applicable to the bifurcated driver
are going to be very, very limited. Even the we can set up RX and TX queues
and perform RX and TX on them, but everything else, as far as I can see, is
going to be controlled externally via ethtool access to the kernel. Therefore, it
seems to me that an i40e device in bifurcated mode has much more in common with
an ixgbe device in bifurcated mode that with an i40e device being directly
controlled by DPDK. For these reasons I think a single driver for all bifurcated
drivers makes more sense.
However, without having prototyped such a scheme one cannot be sure if it will
really work, so I'm curious what other people think is the best approach to
producing such a driver.
/Bruce
> >
> > /Bruce
> >
> >
> > >
> > > > My thinking is that the bifurcated driver is so significantly different in
> > > > the way it works, and the limits on it's functionality e.g. no direct filter
> > > > support or queue management, that it's best kept completely separate and
> > only
> > > > "borrow" the needed descriptor read/write functions from the other drivers
> > as is
> > > > needed.
> > > >
> > > > Just my 2c. I'm curious as to what others think.
> > > >
> > > > /Bruce
> > > >
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
` (5 preceding siblings ...)
2014-11-25 14:11 ` [dpdk-dev] [RFC PATCH 6/6] ixgbe: PMD for bifurc ixgbe net device Cunming Liang
@ 2014-11-25 14:23 ` Neil Horman
2014-11-25 14:29 ` Bruce Richardson
2015-04-09 3:43 ` 贾学涛
7 siblings, 1 reply; 24+ messages in thread
From: Neil Horman @ 2014-11-25 14:23 UTC (permalink / raw)
To: Cunming Liang; +Cc: dev
On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
>
> This is a RFC patch set to support "bifurcated driver" in DPDK.
>
>
> What is "bifurcated driver"?
> ===========================
>
> The "bifurcated driver" stands for the kernel NIC driver that supports:
>
> 1. on-demand rx/tx queue pairs split-off and assignment to user space
>
> 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
>
> 3. distributing packets to kernel or user space rx queues by
> NIC's flow director according to the filter rules
>
> Here's the kernel patch set to support.
> http://comments.gmane.org/gmane.linux.network/333615
>
>
> Usage scenario
> =================
>
> It's well accepted by industry to use DPDK to process fast path packets in
> user space in a high performance fashion, meanwhile processing slow path
> control packets in kernel space is still needed as those packets usually
> rely on in_kernel TCP/IP stacks and/or socket programming interface.
>
> KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> requirement, with below limitation:
>
> 1) Software classifies packets and distributes them to kernel via DPDK
> software rings, at the cost of significant CPU cycles and memory bandwidth.
>
> 2) Memory copy packets between kernel' socket buffer and mbuf brings
> significant negative performance impact to KNI performance.
>
> The bifurcated driver provides a alternative approach that not only offloads
> flow classification and distribution to NIC but also support packets zero_copy.
>
> User can use standard ethtool to add filter rules to the NIC in order to
> distribute specific flows to the queues only accessed by kernel driver and
> stack, and add other rules to distribute packets to the queues assigned to
> user-space.
>
> For those rx/tx queue pairs that directly accessed from user space,
> DPDK takes over the packets rx/tx as well as corresponding DMA operation
> for high performance packet I/O.
>
>
> What's the impact and change to DPDK
> ======================================
>
> DPDK usually binds PCIe NIC devices by leveraging kernel' user space driver
> mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user space.
> The bifurcated driver PMD talks to a NIC interface using raw socket APIs and
> only mmap() limited I/O space (e.g. certain 4K pages) for accessing involved
> rx/tx queue pairs. So the impact and changes mainly comes with below:
>
> - netdev
> DPDK needs to create a af_packet socket and bind it to a bifurcated netdev.
> The socket fd will be used to request 'queue pairs info',
> 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC address,
> numa info are also from the netdev response.
>
> - PCIe device scan and driver probe
> netdev provides the PCIe device ID information. Refer to the device ID,
> the correct driver should be used. And for such netdev device, the creation
> of PCIe device is no longer from scan but the on-demand assignment.
>
> - PCIe BAR mapping
> "bifurcated driver" maps several pages for the queue pairs.
> Others BAR register space maps to a fake page. The BAR mapping go through
> mmap on sockfd. Which is a little different from what UIO/VFIO does.
>
> - PMD
> The PMD will no longer really initialize and configure NIC.
> Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
>
> The patch uses eal '--vdev' parameter to assign netdev iface name and number of
> queue pairs. Here's a example about how to configure the bifurcated driver and
> run DPDK testpmd with bifurcated PMD.
>
> 1. Set promisc mode
> > ifconfig eth0 promisc
>
> 2. Turn on fdir
> > ethtool -K eth0 ntuple on
>
> 3. Setup a flow director rule to distribute packets with source ip
> 0.0.0.0 to rxq No.0
> > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
>
> 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > -i --rxfreet=32 --txfreet=32 --txrst=32
> Note:
> iface and qpairs arguments above specify the netdev interface name and
> number of qpairs that user space request from the "bifurcated driver"
> respectively.
>
> 5. Setup a flow director rule to distribute packets with source ip
> 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
>
> Below illustrates the detailed changes in this patch set.
>
> eal
> --------
> The first two patches are all about the eal API declaration and Linux version
> definition to support af_packet socket and verbs of bifurcated netdev.
> Those APIs include the verbs like open, bind, (un)map, split/retturn, map_umem.
> And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> generate pci device from bifurcated netdev and get basic netdev info.
>
> The third patch is used to allow probing driver on the PCIe VDEV created from
> a NIC interface driven by "bifurcated driver". It defines a new flag
> 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
>
> librte_bifurc
> ---------------
> The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> from eal command-line. It generates the PCIe VDEV device ready for further
> driver probe. It maintains the bifurcated device information include sockfd,
> hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access PMD
> to apply for bifurcated device info.
>
> direct ring access PMD
> -------------------------
> The patch provides direct ring access PMD for ixgbe. Comparing to the normal
> PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> It mostly reuses the existing PMD ops to avoid re-implementing everything
> from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> setup from any queue offset.
>
> Supported NIC driver
> ========================
>
> The "bifurcated driver" kernel patch only supports "ixgbe" driver at the moment,
> so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as sample.
> The support for 40GE(i40e) will be added in the future.
>
> In addition, for those multi-queues enabled NIC with flow director capability
> to do perform packet classification and distribution, there's no special
> technical gap to provide bifurcated driver approach support.
>
> Limitation
> ============
>
> By using "bifurcated driver", user space only takes over the DMA operation.
> For those NIC configure setting, it's out of control from user space PMD.
> All the NIC setting including add/del filter rules need to be done by
> standard Linux network tools(e.g. ethtool).
> So the feature support really depend on how much are supported by ethtool.
>
>
> Any questions, comments and feedback are welcome.
>
>
> -END-
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
>
> *** BLURB HERE ***
>
> Cunming Liang (6):
> eal: common direct ring access API
> eal: direct ring access support by linux af_packet
> pci: allow VDEV as pci device during device driver probe
> bifurc: add driver to scan bifurcated netdev
> ixgbe: rx/tx queue stop bug fix
> ixgbe: PMD for bifurc ixgbe net device
>
> config/common_linuxapp | 5 +
> lib/Makefile | 1 +
> lib/librte_bifurc/Makefile | 58 +++++
> lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++
> lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> lib/librte_eal/common/Makefile | 5 +
> lib/librte_eal/common/include/rte_pci.h | 4 +
> lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> lib/librte_eal/linuxapp/eal/Makefile | 1 +
> lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336 +++++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.c | 3 +-
> lib/librte_pmd_ixgbe/Makefile | 13 +-
> lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303 ++++++++++++++++++++++
> lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> mk/rte.app.mk | 6 +
> 18 files changed, 1421 insertions(+), 27 deletions(-)
> create mode 100644 lib/librte_bifurc/Makefile
> create mode 100644 lib/librte_bifurc/rte_bifurc.c
> create mode 100644 lib/librte_bifurc/rte_bifurc.h
> create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
>
> --
> 1.8.1.4
>
>
AIUI, the bifurcated driver hasn't yet been accepted upstream, has it? Given
that, I don't think its wise to pull this in yet ahead of the kernel work, as
there may still be kernel side changes that the user space pmd will have to
adapt to.
Neil
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:23 ` [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Neil Horman
@ 2014-11-25 14:29 ` Bruce Richardson
2014-11-25 14:40 ` Liang, Cunming
2014-11-25 14:57 ` Walukiewicz, Miroslaw
0 siblings, 2 replies; 24+ messages in thread
From: Bruce Richardson @ 2014-11-25 14:29 UTC (permalink / raw)
To: Neil Horman; +Cc: dev
On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> >
> > This is a RFC patch set to support "bifurcated driver" in DPDK.
> >
> >
> > What is "bifurcated driver"?
> > ===========================
> >
> > The "bifurcated driver" stands for the kernel NIC driver that supports:
> >
> > 1. on-demand rx/tx queue pairs split-off and assignment to user space
> >
> > 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
> >
> > 3. distributing packets to kernel or user space rx queues by
> > NIC's flow director according to the filter rules
> >
> > Here's the kernel patch set to support.
> > http://comments.gmane.org/gmane.linux.network/333615
> >
> >
> > Usage scenario
> > =================
> >
> > It's well accepted by industry to use DPDK to process fast path packets in
> > user space in a high performance fashion, meanwhile processing slow path
> > control packets in kernel space is still needed as those packets usually
> > rely on in_kernel TCP/IP stacks and/or socket programming interface.
> >
> > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> > requirement, with below limitation:
> >
> > 1) Software classifies packets and distributes them to kernel via DPDK
> > software rings, at the cost of significant CPU cycles and memory bandwidth.
> >
> > 2) Memory copy packets between kernel' socket buffer and mbuf brings
> > significant negative performance impact to KNI performance.
> >
> > The bifurcated driver provides a alternative approach that not only offloads
> > flow classification and distribution to NIC but also support packets zero_copy.
> >
> > User can use standard ethtool to add filter rules to the NIC in order to
> > distribute specific flows to the queues only accessed by kernel driver and
> > stack, and add other rules to distribute packets to the queues assigned to
> > user-space.
> >
> > For those rx/tx queue pairs that directly accessed from user space,
> > DPDK takes over the packets rx/tx as well as corresponding DMA operation
> > for high performance packet I/O.
> >
> >
> > What's the impact and change to DPDK
> > ======================================
> >
> > DPDK usually binds PCIe NIC devices by leveraging kernel' user space driver
> > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user space.
> > The bifurcated driver PMD talks to a NIC interface using raw socket APIs and
> > only mmap() limited I/O space (e.g. certain 4K pages) for accessing involved
> > rx/tx queue pairs. So the impact and changes mainly comes with below:
> >
> > - netdev
> > DPDK needs to create a af_packet socket and bind it to a bifurcated netdev.
> > The socket fd will be used to request 'queue pairs info',
> > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC address,
> > numa info are also from the netdev response.
> >
> > - PCIe device scan and driver probe
> > netdev provides the PCIe device ID information. Refer to the device ID,
> > the correct driver should be used. And for such netdev device, the creation
> > of PCIe device is no longer from scan but the on-demand assignment.
> >
> > - PCIe BAR mapping
> > "bifurcated driver" maps several pages for the queue pairs.
> > Others BAR register space maps to a fake page. The BAR mapping go through
> > mmap on sockfd. Which is a little different from what UIO/VFIO does.
> >
> > - PMD
> > The PMD will no longer really initialize and configure NIC.
> > Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
> >
> > The patch uses eal '--vdev' parameter to assign netdev iface name and number of
> > queue pairs. Here's a example about how to configure the bifurcated driver and
> > run DPDK testpmd with bifurcated PMD.
> >
> > 1. Set promisc mode
> > > ifconfig eth0 promisc
> >
> > 2. Turn on fdir
> > > ethtool -K eth0 ntuple on
> >
> > 3. Setup a flow director rule to distribute packets with source ip
> > 0.0.0.0 to rxq No.0
> > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> >
> > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > Note:
> > iface and qpairs arguments above specify the netdev interface name and
> > number of qpairs that user space request from the "bifurcated driver"
> > respectively.
> >
> > 5. Setup a flow director rule to distribute packets with source ip
> > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> >
> > Below illustrates the detailed changes in this patch set.
> >
> > eal
> > --------
> > The first two patches are all about the eal API declaration and Linux version
> > definition to support af_packet socket and verbs of bifurcated netdev.
> > Those APIs include the verbs like open, bind, (un)map, split/retturn, map_umem.
> > And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> > generate pci device from bifurcated netdev and get basic netdev info.
> >
> > The third patch is used to allow probing driver on the PCIe VDEV created from
> > a NIC interface driven by "bifurcated driver". It defines a new flag
> > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> >
> > librte_bifurc
> > ---------------
> > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> > from eal command-line. It generates the PCIe VDEV device ready for further
> > driver probe. It maintains the bifurcated device information include sockfd,
> > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access PMD
> > to apply for bifurcated device info.
> >
> > direct ring access PMD
> > -------------------------
> > The patch provides direct ring access PMD for ixgbe. Comparing to the normal
> > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> > It mostly reuses the existing PMD ops to avoid re-implementing everything
> > from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> > setup from any queue offset.
> >
> > Supported NIC driver
> > ========================
> >
> > The "bifurcated driver" kernel patch only supports "ixgbe" driver at the moment,
> > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as sample.
> > The support for 40GE(i40e) will be added in the future.
> >
> > In addition, for those multi-queues enabled NIC with flow director capability
> > to do perform packet classification and distribution, there's no special
> > technical gap to provide bifurcated driver approach support.
> >
> > Limitation
> > ============
> >
> > By using "bifurcated driver", user space only takes over the DMA operation.
> > For those NIC configure setting, it's out of control from user space PMD.
> > All the NIC setting including add/del filter rules need to be done by
> > standard Linux network tools(e.g. ethtool).
> > So the feature support really depend on how much are supported by ethtool.
> >
> >
> > Any questions, comments and feedback are welcome.
> >
> >
> > -END-
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> >
> > *** BLURB HERE ***
> >
> > Cunming Liang (6):
> > eal: common direct ring access API
> > eal: direct ring access support by linux af_packet
> > pci: allow VDEV as pci device during device driver probe
> > bifurc: add driver to scan bifurcated netdev
> > ixgbe: rx/tx queue stop bug fix
> > ixgbe: PMD for bifurc ixgbe net device
> >
> > config/common_linuxapp | 5 +
> > lib/Makefile | 1 +
> > lib/librte_bifurc/Makefile | 58 +++++
> > lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++
> > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > lib/librte_eal/common/Makefile | 5 +
> > lib/librte_eal/common/include/rte_pci.h | 4 +
> > lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336 +++++++++++++++++++++++++
> > lib/librte_ether/rte_ethdev.c | 3 +-
> > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303 ++++++++++++++++++++++
> > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > mk/rte.app.mk | 6 +
> > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > create mode 100644 lib/librte_bifurc/Makefile
> > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> >
> > --
> > 1.8.1.4
> >
> >
> AIUI, the bifurcated driver hasn't yet been accepted upstream, has it? Given
> that, I don't think its wise to pull this in yet ahead of the kernel work, as
> there may still be kernel side changes that the user space pmd will have to
> adapt to.
> Neil
>
Hence the RFC nature of the patch, I believe. :-) Before the kernel part hits the
main kernel tree we can at least discuss the overall direction to be taken for
this driver because it's significantly different that any other HW driver.
/Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:29 ` Bruce Richardson
@ 2014-11-25 14:40 ` Liang, Cunming
2014-11-25 14:46 ` Zhou, Danny
2014-11-25 14:57 ` Walukiewicz, Miroslaw
1 sibling, 1 reply; 24+ messages in thread
From: Liang, Cunming @ 2014-11-25 14:40 UTC (permalink / raw)
To: Richardson, Bruce, Neil Horman; +Cc: dev
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Tuesday, November 25, 2014 10:30 PM
> To: Neil Horman
> Cc: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
>
> On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > >
> > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > >
> > >
> > > What is "bifurcated driver"?
> > > ===========================
> > >
> > > The "bifurcated driver" stands for the kernel NIC driver that supports:
> > >
> > > 1. on-demand rx/tx queue pairs split-off and assignment to user space
> > >
> > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
> > >
> > > 3. distributing packets to kernel or user space rx queues by
> > > NIC's flow director according to the filter rules
> > >
> > > Here's the kernel patch set to support.
> > > http://comments.gmane.org/gmane.linux.network/333615
> > >
> > >
> > > Usage scenario
> > > =================
> > >
> > > It's well accepted by industry to use DPDK to process fast path packets in
> > > user space in a high performance fashion, meanwhile processing slow path
> > > control packets in kernel space is still needed as those packets usually
> > > rely on in_kernel TCP/IP stacks and/or socket programming interface.
> > >
> > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> > > requirement, with below limitation:
> > >
> > > 1) Software classifies packets and distributes them to kernel via DPDK
> > > software rings, at the cost of significant CPU cycles and memory
> bandwidth.
> > >
> > > 2) Memory copy packets between kernel' socket buffer and mbuf brings
> > > significant negative performance impact to KNI performance.
> > >
> > > The bifurcated driver provides a alternative approach that not only offloads
> > > flow classification and distribution to NIC but also support packets zero_copy.
> > >
> > > User can use standard ethtool to add filter rules to the NIC in order to
> > > distribute specific flows to the queues only accessed by kernel driver and
> > > stack, and add other rules to distribute packets to the queues assigned to
> > > user-space.
> > >
> > > For those rx/tx queue pairs that directly accessed from user space,
> > > DPDK takes over the packets rx/tx as well as corresponding DMA operation
> > > for high performance packet I/O.
> > >
> > >
> > > What's the impact and change to DPDK
> > > ======================================
> > >
> > > DPDK usually binds PCIe NIC devices by leveraging kernel' user space driver
> > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
> space.
> > > The bifurcated driver PMD talks to a NIC interface using raw socket APIs and
> > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing involved
> > > rx/tx queue pairs. So the impact and changes mainly comes with below:
> > >
> > > - netdev
> > > DPDK needs to create a af_packet socket and bind it to a bifurcated
> netdev.
> > > The socket fd will be used to request 'queue pairs info',
> > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> address,
> > > numa info are also from the netdev response.
> > >
> > > - PCIe device scan and driver probe
> > > netdev provides the PCIe device ID information. Refer to the device ID,
> > > the correct driver should be used. And for such netdev device, the
> creation
> > > of PCIe device is no longer from scan but the on-demand assignment.
> > >
> > > - PCIe BAR mapping
> > > "bifurcated driver" maps several pages for the queue pairs.
> > > Others BAR register space maps to a fake page. The BAR mapping go
> through
> > > mmap on sockfd. Which is a little different from what UIO/VFIO does.
> > >
> > > - PMD
> > > The PMD will no longer really initialize and configure NIC.
> > > Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
> > >
> > > The patch uses eal '--vdev' parameter to assign netdev iface name and
> number of
> > > queue pairs. Here's a example about how to configure the bifurcated driver
> and
> > > run DPDK testpmd with bifurcated PMD.
> > >
> > > 1. Set promisc mode
> > > > ifconfig eth0 promisc
> > >
> > > 2. Turn on fdir
> > > > ethtool -K eth0 ntuple on
> > >
> > > 3. Setup a flow director rule to distribute packets with source ip
> > > 0.0.0.0 to rxq No.0
> > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > >
> > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > Note:
> > > iface and qpairs arguments above specify the netdev interface name and
> > > number of qpairs that user space request from the "bifurcated driver"
> > > respectively.
> > >
> > > 5. Setup a flow director rule to distribute packets with source ip
> > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > >
> > > Below illustrates the detailed changes in this patch set.
> > >
> > > eal
> > > --------
> > > The first two patches are all about the eal API declaration and Linux version
> > > definition to support af_packet socket and verbs of bifurcated netdev.
> > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> map_umem.
> > > And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> > > generate pci device from bifurcated netdev and get basic netdev info.
> > >
> > > The third patch is used to allow probing driver on the PCIe VDEV created from
> > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > >
> > > librte_bifurc
> > > ---------------
> > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> > > from eal command-line. It generates the PCIe VDEV device ready for further
> > > driver probe. It maintains the bifurcated device information include sockfd,
> > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access PMD
> > > to apply for bifurcated device info.
> > >
> > > direct ring access PMD
> > > -------------------------
> > > The patch provides direct ring access PMD for ixgbe. Comparing to the normal
> > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> > > It mostly reuses the existing PMD ops to avoid re-implementing everything
> > > from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> > > setup from any queue offset.
> > >
> > > Supported NIC driver
> > > ========================
> > >
> > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
> moment,
> > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
> sample.
> > > The support for 40GE(i40e) will be added in the future.
> > >
> > > In addition, for those multi-queues enabled NIC with flow director capability
> > > to do perform packet classification and distribution, there's no special
> > > technical gap to provide bifurcated driver approach support.
> > >
> > > Limitation
> > > ============
> > >
> > > By using "bifurcated driver", user space only takes over the DMA operation.
> > > For those NIC configure setting, it's out of control from user space PMD.
> > > All the NIC setting including add/del filter rules need to be done by
> > > standard Linux network tools(e.g. ethtool).
> > > So the feature support really depend on how much are supported by ethtool.
> > >
> > >
> > > Any questions, comments and feedback are welcome.
> > >
> > >
> > > -END-
> > >
> > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > >
> > > *** BLURB HERE ***
> > >
> > > Cunming Liang (6):
> > > eal: common direct ring access API
> > > eal: direct ring access support by linux af_packet
> > > pci: allow VDEV as pci device during device driver probe
> > > bifurc: add driver to scan bifurcated netdev
> > > ixgbe: rx/tx queue stop bug fix
> > > ixgbe: PMD for bifurc ixgbe net device
> > >
> > > config/common_linuxapp | 5 +
> > > lib/Makefile | 1 +
> > > lib/librte_bifurc/Makefile | 58 +++++
> > > lib/librte_bifurc/rte_bifurc.c | 284
> +++++++++++++++++++++
> > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > lib/librte_eal/common/Makefile | 5 +
> > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> +++++++++++++++++++++++++
> > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> ++++++++++++++++++++++
> > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > mk/rte.app.mk | 6 +
> > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > create mode 100644 lib/librte_bifurc/Makefile
> > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > >
> > > --
> > > 1.8.1.4
> > >
> > >
> > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it? Given
> > that, I don't think its wise to pull this in yet ahead of the kernel work, as
> > there may still be kernel side changes that the user space pmd will have to
> > adapt to.
> > Neil
> >
> Hence the RFC nature of the patch, I believe. :-) Before the kernel part hits the
> main kernel tree we can at least discuss the overall direction to be taken for
> this driver because it's significantly different that any other HW driver.
[Liang, Cunming] Yes, as Bruce said, that's the major purpose.
Another one is having this patch, people can run it together with kernel patch.
It helps to understand the benefit and raise comments per user experience.
>
> /Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:40 ` Liang, Cunming
@ 2014-11-25 14:46 ` Zhou, Danny
0 siblings, 0 replies; 24+ messages in thread
From: Zhou, Danny @ 2014-11-25 14:46 UTC (permalink / raw)
To: Liang, Cunming, Richardson, Bruce, Neil Horman; +Cc: dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liang, Cunming
> Sent: Tuesday, November 25, 2014 10:40 PM
> To: Richardson, Bruce; Neil Horman
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
>
>
>
> > -----Original Message-----
> > From: Richardson, Bruce
> > Sent: Tuesday, November 25, 2014 10:30 PM
> > To: Neil Horman
> > Cc: Liang, Cunming; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
> >
> > On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > > >
> > > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > > >
> > > >
> > > > What is "bifurcated driver"?
> > > > ===========================
> > > >
> > > > The "bifurcated driver" stands for the kernel NIC driver that supports:
> > > >
> > > > 1. on-demand rx/tx queue pairs split-off and assignment to user space
> > > >
> > > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
> > > >
> > > > 3. distributing packets to kernel or user space rx queues by
> > > > NIC's flow director according to the filter rules
> > > >
> > > > Here's the kernel patch set to support.
> > > > http://comments.gmane.org/gmane.linux.network/333615
> > > >
> > > >
> > > > Usage scenario
> > > > =================
> > > >
> > > > It's well accepted by industry to use DPDK to process fast path packets in
> > > > user space in a high performance fashion, meanwhile processing slow path
> > > > control packets in kernel space is still needed as those packets usually
> > > > rely on in_kernel TCP/IP stacks and/or socket programming interface.
> > > >
> > > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> > > > requirement, with below limitation:
> > > >
> > > > 1) Software classifies packets and distributes them to kernel via DPDK
> > > > software rings, at the cost of significant CPU cycles and memory
> > bandwidth.
> > > >
> > > > 2) Memory copy packets between kernel' socket buffer and mbuf brings
> > > > significant negative performance impact to KNI performance.
> > > >
> > > > The bifurcated driver provides a alternative approach that not only offloads
> > > > flow classification and distribution to NIC but also support packets zero_copy.
> > > >
> > > > User can use standard ethtool to add filter rules to the NIC in order to
> > > > distribute specific flows to the queues only accessed by kernel driver and
> > > > stack, and add other rules to distribute packets to the queues assigned to
> > > > user-space.
> > > >
> > > > For those rx/tx queue pairs that directly accessed from user space,
> > > > DPDK takes over the packets rx/tx as well as corresponding DMA operation
> > > > for high performance packet I/O.
> > > >
> > > >
> > > > What's the impact and change to DPDK
> > > > ======================================
> > > >
> > > > DPDK usually binds PCIe NIC devices by leveraging kernel' user space driver
> > > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
> > space.
> > > > The bifurcated driver PMD talks to a NIC interface using raw socket APIs and
> > > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing involved
> > > > rx/tx queue pairs. So the impact and changes mainly comes with below:
> > > >
> > > > - netdev
> > > > DPDK needs to create a af_packet socket and bind it to a bifurcated
> > netdev.
> > > > The socket fd will be used to request 'queue pairs info',
> > > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> > address,
> > > > numa info are also from the netdev response.
> > > >
> > > > - PCIe device scan and driver probe
> > > > netdev provides the PCIe device ID information. Refer to the device ID,
> > > > the correct driver should be used. And for such netdev device, the
> > creation
> > > > of PCIe device is no longer from scan but the on-demand assignment.
> > > >
> > > > - PCIe BAR mapping
> > > > "bifurcated driver" maps several pages for the queue pairs.
> > > > Others BAR register space maps to a fake page. The BAR mapping go
> > through
> > > > mmap on sockfd. Which is a little different from what UIO/VFIO does.
> > > >
> > > > - PMD
> > > > The PMD will no longer really initialize and configure NIC.
> > > > Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
> > > >
> > > > The patch uses eal '--vdev' parameter to assign netdev iface name and
> > number of
> > > > queue pairs. Here's a example about how to configure the bifurcated driver
> > and
> > > > run DPDK testpmd with bifurcated PMD.
> > > >
> > > > 1. Set promisc mode
> > > > > ifconfig eth0 promisc
> > > >
> > > > 2. Turn on fdir
> > > > > ethtool -K eth0 ntuple on
> > > >
> > > > 3. Setup a flow director rule to distribute packets with source ip
> > > > 0.0.0.0 to rxq No.0
> > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > > >
> > > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > > Note:
> > > > iface and qpairs arguments above specify the netdev interface name and
> > > > number of qpairs that user space request from the "bifurcated driver"
> > > > respectively.
> > > >
> > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > > >
> > > > Below illustrates the detailed changes in this patch set.
> > > >
> > > > eal
> > > > --------
> > > > The first two patches are all about the eal API declaration and Linux version
> > > > definition to support af_packet socket and verbs of bifurcated netdev.
> > > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> > map_umem.
> > > > And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> > > > generate pci device from bifurcated netdev and get basic netdev info.
> > > >
> > > > The third patch is used to allow probing driver on the PCIe VDEV created from
> > > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > > >
> > > > librte_bifurc
> > > > ---------------
> > > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> > > > from eal command-line. It generates the PCIe VDEV device ready for further
> > > > driver probe. It maintains the bifurcated device information include sockfd,
> > > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access PMD
> > > > to apply for bifurcated device info.
> > > >
> > > > direct ring access PMD
> > > > -------------------------
> > > > The patch provides direct ring access PMD for ixgbe. Comparing to the normal
> > > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> > > > It mostly reuses the existing PMD ops to avoid re-implementing everything
> > > > from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> > > > setup from any queue offset.
> > > >
> > > > Supported NIC driver
> > > > ========================
> > > >
> > > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
> > moment,
> > > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
> > sample.
> > > > The support for 40GE(i40e) will be added in the future.
> > > >
> > > > In addition, for those multi-queues enabled NIC with flow director capability
> > > > to do perform packet classification and distribution, there's no special
> > > > technical gap to provide bifurcated driver approach support.
> > > >
> > > > Limitation
> > > > ============
> > > >
> > > > By using "bifurcated driver", user space only takes over the DMA operation.
> > > > For those NIC configure setting, it's out of control from user space PMD.
> > > > All the NIC setting including add/del filter rules need to be done by
> > > > standard Linux network tools(e.g. ethtool).
> > > > So the feature support really depend on how much are supported by ethtool.
> > > >
> > > >
> > > > Any questions, comments and feedback are welcome.
> > > >
> > > >
> > > > -END-
> > > >
> > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > >
> > > > *** BLURB HERE ***
> > > >
> > > > Cunming Liang (6):
> > > > eal: common direct ring access API
> > > > eal: direct ring access support by linux af_packet
> > > > pci: allow VDEV as pci device during device driver probe
> > > > bifurc: add driver to scan bifurcated netdev
> > > > ixgbe: rx/tx queue stop bug fix
> > > > ixgbe: PMD for bifurc ixgbe net device
> > > >
> > > > config/common_linuxapp | 5 +
> > > > lib/Makefile | 1 +
> > > > lib/librte_bifurc/Makefile | 58 +++++
> > > > lib/librte_bifurc/rte_bifurc.c | 284
> > +++++++++++++++++++++
> > > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > > lib/librte_eal/common/Makefile | 5 +
> > > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> > > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> > +++++++++++++++++++++++++
> > > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > ++++++++++++++++++++++
> > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > > mk/rte.app.mk | 6 +
> > > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > > create mode 100644 lib/librte_bifurc/Makefile
> > > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > > create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> > > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > >
> > > > --
> > > > 1.8.1.4
> > > >
> > > >
> > > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it? Given
> > > that, I don't think its wise to pull this in yet ahead of the kernel work, as
> > > there may still be kernel side changes that the user space pmd will have to
> > > adapt to.
> > > Neil
> > >
> > Hence the RFC nature of the patch, I believe. :-) Before the kernel part hits the
> > main kernel tree we can at least discuss the overall direction to be taken for
> > this driver because it's significantly different that any other HW driver.
> [Liang, Cunming] Yes, as Bruce said, that's the major purpose.
> Another one is having this patch, people can run it together with kernel patch.
> It helps to understand the benefit and raise comments per user experience.
> >
> > /Bruce
Echo Bruce. Also the V2 DPDK RFC patchset will be submitted to dpdk.org to support
V2 netdev kernel patchset with memory protection accordingly. Then people can play
with bifurcated driver and have a global view on how it works and what kinds of perf.
can be achieved, instead of keep asking basic questions.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:29 ` Bruce Richardson
2014-11-25 14:40 ` Liang, Cunming
@ 2014-11-25 14:57 ` Walukiewicz, Miroslaw
2014-11-25 15:02 ` Bruce Richardson
1 sibling, 1 reply; 24+ messages in thread
From: Walukiewicz, Miroslaw @ 2014-11-25 14:57 UTC (permalink / raw)
To: Richardson, Bruce, Neil Horman; +Cc: dev
Thank you Bruce for explanation of the idea.
I have question regarding TCP SYN packets? Do you have any idea how to share the TCP SYN requests between kernel and user-space application?
Regards,
Mirek
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> Sent: Tuesday, November 25, 2014 3:30 PM
> To: Neil Horman
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
>
> On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > >
> > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > >
> > >
> > > What is "bifurcated driver"?
> > > ===========================
> > >
> > > The "bifurcated driver" stands for the kernel NIC driver that supports:
> > >
> > > 1. on-demand rx/tx queue pairs split-off and assignment to user space
> > >
> > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
> > >
> > > 3. distributing packets to kernel or user space rx queues by
> > > NIC's flow director according to the filter rules
> > >
> > > Here's the kernel patch set to support.
> > > http://comments.gmane.org/gmane.linux.network/333615
> > >
> > >
> > > Usage scenario
> > > =================
> > >
> > > It's well accepted by industry to use DPDK to process fast path packets in
> > > user space in a high performance fashion, meanwhile processing slow
> path
> > > control packets in kernel space is still needed as those packets usually
> > > rely on in_kernel TCP/IP stacks and/or socket programming interface.
> > >
> > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> > > requirement, with below limitation:
> > >
> > > 1) Software classifies packets and distributes them to kernel via DPDK
> > > software rings, at the cost of significant CPU cycles and memory
> bandwidth.
> > >
> > > 2) Memory copy packets between kernel' socket buffer and mbuf brings
> > > significant negative performance impact to KNI performance.
> > >
> > > The bifurcated driver provides a alternative approach that not only
> offloads
> > > flow classification and distribution to NIC but also support packets
> zero_copy.
> > >
> > > User can use standard ethtool to add filter rules to the NIC in order to
> > > distribute specific flows to the queues only accessed by kernel driver and
> > > stack, and add other rules to distribute packets to the queues assigned to
> > > user-space.
> > >
> > > For those rx/tx queue pairs that directly accessed from user space,
> > > DPDK takes over the packets rx/tx as well as corresponding DMA
> operation
> > > for high performance packet I/O.
> > >
> > >
> > > What's the impact and change to DPDK
> > > ======================================
> > >
> > > DPDK usually binds PCIe NIC devices by leveraging kernel' user space
> driver
> > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
> space.
> > > The bifurcated driver PMD talks to a NIC interface using raw socket APIs
> and
> > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing
> involved
> > > rx/tx queue pairs. So the impact and changes mainly comes with below:
> > >
> > > - netdev
> > > DPDK needs to create a af_packet socket and bind it to a bifurcated
> netdev.
> > > The socket fd will be used to request 'queue pairs info',
> > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> address,
> > > numa info are also from the netdev response.
> > >
> > > - PCIe device scan and driver probe
> > > netdev provides the PCIe device ID information. Refer to the device ID,
> > > the correct driver should be used. And for such netdev device, the
> creation
> > > of PCIe device is no longer from scan but the on-demand assignment.
> > >
> > > - PCIe BAR mapping
> > > "bifurcated driver" maps several pages for the queue pairs.
> > > Others BAR register space maps to a fake page. The BAR mapping go
> through
> > > mmap on sockfd. Which is a little different from what UIO/VFIO does.
> > >
> > > - PMD
> > > The PMD will no longer really initialize and configure NIC.
> > > Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
> > >
> > > The patch uses eal '--vdev' parameter to assign netdev iface name and
> number of
> > > queue pairs. Here's a example about how to configure the bifurcated
> driver and
> > > run DPDK testpmd with bifurcated PMD.
> > >
> > > 1. Set promisc mode
> > > > ifconfig eth0 promisc
> > >
> > > 2. Turn on fdir
> > > > ethtool -K eth0 ntuple on
> > >
> > > 3. Setup a flow director rule to distribute packets with source ip
> > > 0.0.0.0 to rxq No.0
> > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > >
> > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > Note:
> > > iface and qpairs arguments above specify the netdev interface name
> and
> > > number of qpairs that user space request from the "bifurcated driver"
> > > respectively.
> > >
> > > 5. Setup a flow director rule to distribute packets with source ip
> > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > >
> > > Below illustrates the detailed changes in this patch set.
> > >
> > > eal
> > > --------
> > > The first two patches are all about the eal API declaration and Linux
> version
> > > definition to support af_packet socket and verbs of bifurcated netdev.
> > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> map_umem.
> > > And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> > > generate pci device from bifurcated netdev and get basic netdev info.
> > >
> > > The third patch is used to allow probing driver on the PCIe VDEV created
> from
> > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > >
> > > librte_bifurc
> > > ---------------
> > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> > > from eal command-line. It generates the PCIe VDEV device ready for
> further
> > > driver probe. It maintains the bifurcated device information include
> sockfd,
> > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access
> PMD
> > > to apply for bifurcated device info.
> > >
> > > direct ring access PMD
> > > -------------------------
> > > The patch provides direct ring access PMD for ixgbe. Comparing to the
> normal
> > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> > > It mostly reuses the existing PMD ops to avoid re-implementing
> everything
> > > from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> > > setup from any queue offset.
> > >
> > > Supported NIC driver
> > > ========================
> > >
> > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
> moment,
> > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
> sample.
> > > The support for 40GE(i40e) will be added in the future.
> > >
> > > In addition, for those multi-queues enabled NIC with flow director
> capability
> > > to do perform packet classification and distribution, there's no special
> > > technical gap to provide bifurcated driver approach support.
> > >
> > > Limitation
> > > ============
> > >
> > > By using "bifurcated driver", user space only takes over the DMA
> operation.
> > > For those NIC configure setting, it's out of control from user space PMD.
> > > All the NIC setting including add/del filter rules need to be done by
> > > standard Linux network tools(e.g. ethtool).
> > > So the feature support really depend on how much are supported by
> ethtool.
> > >
> > >
> > > Any questions, comments and feedback are welcome.
> > >
> > >
> > > -END-
> > >
> > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > >
> > > *** BLURB HERE ***
> > >
> > > Cunming Liang (6):
> > > eal: common direct ring access API
> > > eal: direct ring access support by linux af_packet
> > > pci: allow VDEV as pci device during device driver probe
> > > bifurc: add driver to scan bifurcated netdev
> > > ixgbe: rx/tx queue stop bug fix
> > > ixgbe: PMD for bifurc ixgbe net device
> > >
> > > config/common_linuxapp | 5 +
> > > lib/Makefile | 1 +
> > > lib/librte_bifurc/Makefile | 58 +++++
> > > lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++
> > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > lib/librte_eal/common/Makefile | 5 +
> > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> +++++++++++++++++++++++++
> > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> ++++++++++++++++++++++
> > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > mk/rte.app.mk | 6 +
> > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > create mode 100644 lib/librte_bifurc/Makefile
> > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > >
> > > --
> > > 1.8.1.4
> > >
> > >
> > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it?
> Given
> > that, I don't think its wise to pull this in yet ahead of the kernel work, as
> > there may still be kernel side changes that the user space pmd will have to
> > adapt to.
> > Neil
> >
> Hence the RFC nature of the patch, I believe. :-) Before the kernel part hits
> the
> main kernel tree we can at least discuss the overall direction to be taken for
> this driver because it's significantly different that any other HW driver.
>
> /Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:57 ` Walukiewicz, Miroslaw
@ 2014-11-25 15:02 ` Bruce Richardson
2014-11-25 15:23 ` Zhou, Danny
0 siblings, 1 reply; 24+ messages in thread
From: Bruce Richardson @ 2014-11-25 15:02 UTC (permalink / raw)
To: Walukiewicz, Miroslaw; +Cc: dev
On Tue, Nov 25, 2014 at 02:57:13PM +0000, Walukiewicz, Miroslaw wrote:
> Thank you Bruce for explanation of the idea.
Actually, credit goes to Steve Liang, not me, for the explanation. :-)
>
> I have question regarding TCP SYN packets? Do you have any idea how to share the TCP SYN requests between kernel and user-space application?
As I'm giving the credit to Steve, I'll also pass the buck for answering that
question to him too! :-)
/Bruce
>
> Regards,
>
> Mirek
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> > Sent: Tuesday, November 25, 2014 3:30 PM
> > To: Neil Horman
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
> >
> > On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > > >
> > > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > > >
> > > >
> > > > What is "bifurcated driver"?
> > > > ===========================
> > > >
> > > > The "bifurcated driver" stands for the kernel NIC driver that supports:
> > > >
> > > > 1. on-demand rx/tx queue pairs split-off and assignment to user space
> > > >
> > > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
> > > >
> > > > 3. distributing packets to kernel or user space rx queues by
> > > > NIC's flow director according to the filter rules
> > > >
> > > > Here's the kernel patch set to support.
> > > > http://comments.gmane.org/gmane.linux.network/333615
> > > >
> > > >
> > > > Usage scenario
> > > > =================
> > > >
> > > > It's well accepted by industry to use DPDK to process fast path packets in
> > > > user space in a high performance fashion, meanwhile processing slow
> > path
> > > > control packets in kernel space is still needed as those packets usually
> > > > rely on in_kernel TCP/IP stacks and/or socket programming interface.
> > > >
> > > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> > > > requirement, with below limitation:
> > > >
> > > > 1) Software classifies packets and distributes them to kernel via DPDK
> > > > software rings, at the cost of significant CPU cycles and memory
> > bandwidth.
> > > >
> > > > 2) Memory copy packets between kernel' socket buffer and mbuf brings
> > > > significant negative performance impact to KNI performance.
> > > >
> > > > The bifurcated driver provides a alternative approach that not only
> > offloads
> > > > flow classification and distribution to NIC but also support packets
> > zero_copy.
> > > >
> > > > User can use standard ethtool to add filter rules to the NIC in order to
> > > > distribute specific flows to the queues only accessed by kernel driver and
> > > > stack, and add other rules to distribute packets to the queues assigned to
> > > > user-space.
> > > >
> > > > For those rx/tx queue pairs that directly accessed from user space,
> > > > DPDK takes over the packets rx/tx as well as corresponding DMA
> > operation
> > > > for high performance packet I/O.
> > > >
> > > >
> > > > What's the impact and change to DPDK
> > > > ======================================
> > > >
> > > > DPDK usually binds PCIe NIC devices by leveraging kernel' user space
> > driver
> > > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
> > space.
> > > > The bifurcated driver PMD talks to a NIC interface using raw socket APIs
> > and
> > > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing
> > involved
> > > > rx/tx queue pairs. So the impact and changes mainly comes with below:
> > > >
> > > > - netdev
> > > > DPDK needs to create a af_packet socket and bind it to a bifurcated
> > netdev.
> > > > The socket fd will be used to request 'queue pairs info',
> > > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> > address,
> > > > numa info are also from the netdev response.
> > > >
> > > > - PCIe device scan and driver probe
> > > > netdev provides the PCIe device ID information. Refer to the device ID,
> > > > the correct driver should be used. And for such netdev device, the
> > creation
> > > > of PCIe device is no longer from scan but the on-demand assignment.
> > > >
> > > > - PCIe BAR mapping
> > > > "bifurcated driver" maps several pages for the queue pairs.
> > > > Others BAR register space maps to a fake page. The BAR mapping go
> > through
> > > > mmap on sockfd. Which is a little different from what UIO/VFIO does.
> > > >
> > > > - PMD
> > > > The PMD will no longer really initialize and configure NIC.
> > > > Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
> > > >
> > > > The patch uses eal '--vdev' parameter to assign netdev iface name and
> > number of
> > > > queue pairs. Here's a example about how to configure the bifurcated
> > driver and
> > > > run DPDK testpmd with bifurcated PMD.
> > > >
> > > > 1. Set promisc mode
> > > > > ifconfig eth0 promisc
> > > >
> > > > 2. Turn on fdir
> > > > > ethtool -K eth0 ntuple on
> > > >
> > > > 3. Setup a flow director rule to distribute packets with source ip
> > > > 0.0.0.0 to rxq No.0
> > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > > >
> > > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > > Note:
> > > > iface and qpairs arguments above specify the netdev interface name
> > and
> > > > number of qpairs that user space request from the "bifurcated driver"
> > > > respectively.
> > > >
> > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > > >
> > > > Below illustrates the detailed changes in this patch set.
> > > >
> > > > eal
> > > > --------
> > > > The first two patches are all about the eal API declaration and Linux
> > version
> > > > definition to support af_packet socket and verbs of bifurcated netdev.
> > > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> > map_umem.
> > > > And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> > > > generate pci device from bifurcated netdev and get basic netdev info.
> > > >
> > > > The third patch is used to allow probing driver on the PCIe VDEV created
> > from
> > > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > > >
> > > > librte_bifurc
> > > > ---------------
> > > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> > > > from eal command-line. It generates the PCIe VDEV device ready for
> > further
> > > > driver probe. It maintains the bifurcated device information include
> > sockfd,
> > > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access
> > PMD
> > > > to apply for bifurcated device info.
> > > >
> > > > direct ring access PMD
> > > > -------------------------
> > > > The patch provides direct ring access PMD for ixgbe. Comparing to the
> > normal
> > > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> > > > It mostly reuses the existing PMD ops to avoid re-implementing
> > everything
> > > > from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> > > > setup from any queue offset.
> > > >
> > > > Supported NIC driver
> > > > ========================
> > > >
> > > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
> > moment,
> > > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
> > sample.
> > > > The support for 40GE(i40e) will be added in the future.
> > > >
> > > > In addition, for those multi-queues enabled NIC with flow director
> > capability
> > > > to do perform packet classification and distribution, there's no special
> > > > technical gap to provide bifurcated driver approach support.
> > > >
> > > > Limitation
> > > > ============
> > > >
> > > > By using "bifurcated driver", user space only takes over the DMA
> > operation.
> > > > For those NIC configure setting, it's out of control from user space PMD.
> > > > All the NIC setting including add/del filter rules need to be done by
> > > > standard Linux network tools(e.g. ethtool).
> > > > So the feature support really depend on how much are supported by
> > ethtool.
> > > >
> > > >
> > > > Any questions, comments and feedback are welcome.
> > > >
> > > >
> > > > -END-
> > > >
> > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > >
> > > > *** BLURB HERE ***
> > > >
> > > > Cunming Liang (6):
> > > > eal: common direct ring access API
> > > > eal: direct ring access support by linux af_packet
> > > > pci: allow VDEV as pci device during device driver probe
> > > > bifurc: add driver to scan bifurcated netdev
> > > > ixgbe: rx/tx queue stop bug fix
> > > > ixgbe: PMD for bifurc ixgbe net device
> > > >
> > > > config/common_linuxapp | 5 +
> > > > lib/Makefile | 1 +
> > > > lib/librte_bifurc/Makefile | 58 +++++
> > > > lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++
> > > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > > lib/librte_eal/common/Makefile | 5 +
> > > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> > > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> > +++++++++++++++++++++++++
> > > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > ++++++++++++++++++++++
> > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > > mk/rte.app.mk | 6 +
> > > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > > create mode 100644 lib/librte_bifurc/Makefile
> > > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > > create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> > > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > >
> > > > --
> > > > 1.8.1.4
> > > >
> > > >
> > > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it?
> > Given
> > > that, I don't think its wise to pull this in yet ahead of the kernel work, as
> > > there may still be kernel side changes that the user space pmd will have to
> > > adapt to.
> > > Neil
> > >
> > Hence the RFC nature of the patch, I believe. :-) Before the kernel part hits
> > the
> > main kernel tree we can at least discuss the overall direction to be taken for
> > this driver because it's significantly different that any other HW driver.
> >
> > /Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 15:02 ` Bruce Richardson
@ 2014-11-25 15:23 ` Zhou, Danny
2014-11-26 10:45 ` Walukiewicz, Miroslaw
0 siblings, 1 reply; 24+ messages in thread
From: Zhou, Danny @ 2014-11-25 15:23 UTC (permalink / raw)
To: Richardson, Bruce, Walukiewicz, Miroslaw; +Cc: dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> Sent: Tuesday, November 25, 2014 11:03 PM
> To: Walukiewicz, Miroslaw
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
>
> On Tue, Nov 25, 2014 at 02:57:13PM +0000, Walukiewicz, Miroslaw wrote:
> > Thank you Bruce for explanation of the idea.
>
> Actually, credit goes to Steve Liang, not me, for the explanation. :-)
>
> >
> > I have question regarding TCP SYN packets? Do you have any idea how to share the TCP SYN requests between kernel and
> user-space application?
>
> As I'm giving the credit to Steve, I'll also pass the buck for answering that
> question to him too! :-)
>
> /Bruce
On ixgbe' Rx queuing flow, match SYN filter stage is prior to Flow Director filter stage. When working at bifurcated driver support mode,
DPDK cannot access those NIC registers except for the ones that are used to rx/tx packets for assigned rx/tx queue pairs. So basically it really
depends on user to use ethtool or other interface to setup SYN filter via ixgbe bifurcated driver. User can distribute TCP SYN packets to
kernel bifurcated driver owned rx queues or DPDK owned rx queues, for the latter case, DPDK can still push them back to kernel via KNI if DPDK
does not want to use them. If you have a user space TCP/IP stacks on top of DPDK, you can push them to the upper level stack rather instead.
> >
> > Regards,
> >
> > Mirek
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> > > Sent: Tuesday, November 25, 2014 3:30 PM
> > > To: Neil Horman
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
> > >
> > > On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > > > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > > > >
> > > > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > > > >
> > > > >
> > > > > What is "bifurcated driver"?
> > > > > ===========================
> > > > >
> > > > > The "bifurcated driver" stands for the kernel NIC driver that supports:
> > > > >
> > > > > 1. on-demand rx/tx queue pairs split-off and assignment to user space
> > > > >
> > > > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
> > > > >
> > > > > 3. distributing packets to kernel or user space rx queues by
> > > > > NIC's flow director according to the filter rules
> > > > >
> > > > > Here's the kernel patch set to support.
> > > > > http://comments.gmane.org/gmane.linux.network/333615
> > > > >
> > > > >
> > > > > Usage scenario
> > > > > =================
> > > > >
> > > > > It's well accepted by industry to use DPDK to process fast path packets in
> > > > > user space in a high performance fashion, meanwhile processing slow
> > > path
> > > > > control packets in kernel space is still needed as those packets usually
> > > > > rely on in_kernel TCP/IP stacks and/or socket programming interface.
> > > > >
> > > > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> > > > > requirement, with below limitation:
> > > > >
> > > > > 1) Software classifies packets and distributes them to kernel via DPDK
> > > > > software rings, at the cost of significant CPU cycles and memory
> > > bandwidth.
> > > > >
> > > > > 2) Memory copy packets between kernel' socket buffer and mbuf brings
> > > > > significant negative performance impact to KNI performance.
> > > > >
> > > > > The bifurcated driver provides a alternative approach that not only
> > > offloads
> > > > > flow classification and distribution to NIC but also support packets
> > > zero_copy.
> > > > >
> > > > > User can use standard ethtool to add filter rules to the NIC in order to
> > > > > distribute specific flows to the queues only accessed by kernel driver and
> > > > > stack, and add other rules to distribute packets to the queues assigned to
> > > > > user-space.
> > > > >
> > > > > For those rx/tx queue pairs that directly accessed from user space,
> > > > > DPDK takes over the packets rx/tx as well as corresponding DMA
> > > operation
> > > > > for high performance packet I/O.
> > > > >
> > > > >
> > > > > What's the impact and change to DPDK
> > > > > ======================================
> > > > >
> > > > > DPDK usually binds PCIe NIC devices by leveraging kernel' user space
> > > driver
> > > > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
> > > space.
> > > > > The bifurcated driver PMD talks to a NIC interface using raw socket APIs
> > > and
> > > > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing
> > > involved
> > > > > rx/tx queue pairs. So the impact and changes mainly comes with below:
> > > > >
> > > > > - netdev
> > > > > DPDK needs to create a af_packet socket and bind it to a bifurcated
> > > netdev.
> > > > > The socket fd will be used to request 'queue pairs info',
> > > > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> > > address,
> > > > > numa info are also from the netdev response.
> > > > >
> > > > > - PCIe device scan and driver probe
> > > > > netdev provides the PCIe device ID information. Refer to the device ID,
> > > > > the correct driver should be used. And for such netdev device, the
> > > creation
> > > > > of PCIe device is no longer from scan but the on-demand assignment.
> > > > >
> > > > > - PCIe BAR mapping
> > > > > "bifurcated driver" maps several pages for the queue pairs.
> > > > > Others BAR register space maps to a fake page. The BAR mapping go
> > > through
> > > > > mmap on sockfd. Which is a little different from what UIO/VFIO does.
> > > > >
> > > > > - PMD
> > > > > The PMD will no longer really initialize and configure NIC.
> > > > > Instead, it only takes care the queue pair setup, rx_burst and tx_burst.
> > > > >
> > > > > The patch uses eal '--vdev' parameter to assign netdev iface name and
> > > number of
> > > > > queue pairs. Here's a example about how to configure the bifurcated
> > > driver and
> > > > > run DPDK testpmd with bifurcated PMD.
> > > > >
> > > > > 1. Set promisc mode
> > > > > > ifconfig eth0 promisc
> > > > >
> > > > > 2. Turn on fdir
> > > > > > ethtool -K eth0 ntuple on
> > > > >
> > > > > 3. Setup a flow director rule to distribute packets with source ip
> > > > > 0.0.0.0 to rxq No.0
> > > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > > > >
> > > > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > > > Note:
> > > > > iface and qpairs arguments above specify the netdev interface name
> > > and
> > > > > number of qpairs that user space request from the "bifurcated driver"
> > > > > respectively.
> > > > >
> > > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > > > >
> > > > > Below illustrates the detailed changes in this patch set.
> > > > >
> > > > > eal
> > > > > --------
> > > > > The first two patches are all about the eal API declaration and Linux
> > > version
> > > > > definition to support af_packet socket and verbs of bifurcated netdev.
> > > > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> > > map_umem.
> > > > > And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> > > > > generate pci device from bifurcated netdev and get basic netdev info.
> > > > >
> > > > > The third patch is used to allow probing driver on the PCIe VDEV created
> > > from
> > > > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > > > >
> > > > > librte_bifurc
> > > > > ---------------
> > > > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> > > > > from eal command-line. It generates the PCIe VDEV device ready for
> > > further
> > > > > driver probe. It maintains the bifurcated device information include
> > > sockfd,
> > > > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access
> > > PMD
> > > > > to apply for bifurcated device info.
> > > > >
> > > > > direct ring access PMD
> > > > > -------------------------
> > > > > The patch provides direct ring access PMD for ixgbe. Comparing to the
> > > normal
> > > > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> > > > > It mostly reuses the existing PMD ops to avoid re-implementing
> > > everything
> > > > > from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> > > > > setup from any queue offset.
> > > > >
> > > > > Supported NIC driver
> > > > > ========================
> > > > >
> > > > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
> > > moment,
> > > > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
> > > sample.
> > > > > The support for 40GE(i40e) will be added in the future.
> > > > >
> > > > > In addition, for those multi-queues enabled NIC with flow director
> > > capability
> > > > > to do perform packet classification and distribution, there's no special
> > > > > technical gap to provide bifurcated driver approach support.
> > > > >
> > > > > Limitation
> > > > > ============
> > > > >
> > > > > By using "bifurcated driver", user space only takes over the DMA
> > > operation.
> > > > > For those NIC configure setting, it's out of control from user space PMD.
> > > > > All the NIC setting including add/del filter rules need to be done by
> > > > > standard Linux network tools(e.g. ethtool).
> > > > > So the feature support really depend on how much are supported by
> > > ethtool.
> > > > >
> > > > >
> > > > > Any questions, comments and feedback are welcome.
> > > > >
> > > > >
> > > > > -END-
> > > > >
> > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > >
> > > > > *** BLURB HERE ***
> > > > >
> > > > > Cunming Liang (6):
> > > > > eal: common direct ring access API
> > > > > eal: direct ring access support by linux af_packet
> > > > > pci: allow VDEV as pci device during device driver probe
> > > > > bifurc: add driver to scan bifurcated netdev
> > > > > ixgbe: rx/tx queue stop bug fix
> > > > > ixgbe: PMD for bifurc ixgbe net device
> > > > >
> > > > > config/common_linuxapp | 5 +
> > > > > lib/Makefile | 1 +
> > > > > lib/librte_bifurc/Makefile | 58 +++++
> > > > > lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++
> > > > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > > > lib/librte_eal/common/Makefile | 5 +
> > > > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> > > > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> > > +++++++++++++++++++++++++
> > > > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > > ++++++++++++++++++++++
> > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > > > mk/rte.app.mk | 6 +
> > > > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > > > create mode 100644 lib/librte_bifurc/Makefile
> > > > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > > > create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> > > > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > > >
> > > > > --
> > > > > 1.8.1.4
> > > > >
> > > > >
> > > > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it?
> > > Given
> > > > that, I don't think its wise to pull this in yet ahead of the kernel work, as
> > > > there may still be kernel side changes that the user space pmd will have to
> > > > adapt to.
> > > > Neil
> > > >
> > > Hence the RFC nature of the patch, I believe. :-) Before the kernel part hits
> > > the
> > > main kernel tree we can at least discuss the overall direction to be taken for
> > > this driver because it's significantly different that any other HW driver.
> > >
> > > /Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 15:23 ` Zhou, Danny
@ 2014-11-26 10:45 ` Walukiewicz, Miroslaw
2014-11-26 12:22 ` Zhou, Danny
0 siblings, 1 reply; 24+ messages in thread
From: Walukiewicz, Miroslaw @ 2014-11-26 10:45 UTC (permalink / raw)
To: Zhou, Danny, Richardson, Bruce; +Cc: dev
Thank you for explanation.
I have a few questions regarding the setup flow yet:
1. Why we need this step:
> 3. Setup a flow director rule to distribute packets with source ip
> > > > > > 0.0.0.0 to rxq No.0
> > > > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
2. You presented the filter setup for receiving all udp4 packets on specific queue
> > > > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
How to configure flow director to receive all packets with dst-ip = 1.1.1.1 on qpair=32?
Will TCP SYN packets caught by such filter setup?
3. Do we have a possibility to setup a rule like:
Forward all TCPv4 rx packets with dst-ip =1.1.1.1 and TCP port 2222 to qpair=32 including SYN packets?
3. In your application example you present that qpair number (32) is known before start of application
> > > > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
Is there a possibility to dynamic queue allocation? I ask about API.
I mean dynamic attaching and detaching queue from application level and not specifying the numbers in the command line.
4. Is there a possibility to create a rule with perfect match and directing the packets to the specific queue.
I mean here a rule like:
Forward all TCPv4 rx packets with dst-ip=1.1.1.1 src-ip=2.2.2.2 dst-port=2222 src-port=1234 to queue 33
Regards,
Mirek
> -----Original Message-----
> From: Zhou, Danny
> Sent: Tuesday, November 25, 2014 4:23 PM
> To: Richardson, Bruce; Walukiewicz, Miroslaw
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> > Sent: Tuesday, November 25, 2014 11:03 PM
> > To: Walukiewicz, Miroslaw
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
> >
> > On Tue, Nov 25, 2014 at 02:57:13PM +0000, Walukiewicz, Miroslaw wrote:
> > > Thank you Bruce for explanation of the idea.
> >
> > Actually, credit goes to Steve Liang, not me, for the explanation. :-)
> >
> > >
> > > I have question regarding TCP SYN packets? Do you have any idea how to
> share the TCP SYN requests between kernel and
> > user-space application?
> >
> > As I'm giving the credit to Steve, I'll also pass the buck for answering that
> > question to him too! :-)
> >
> > /Bruce
>
> On ixgbe' Rx queuing flow, match SYN filter stage is prior to Flow Director
> filter stage. When working at bifurcated driver support mode,
> DPDK cannot access those NIC registers except for the ones that are used to
> rx/tx packets for assigned rx/tx queue pairs. So basically it really
> depends on user to use ethtool or other interface to setup SYN filter via
> ixgbe bifurcated driver. User can distribute TCP SYN packets to
> kernel bifurcated driver owned rx queues or DPDK owned rx queues, for the
> latter case, DPDK can still push them back to kernel via KNI if DPDK
> does not want to use them. If you have a user space TCP/IP stacks on top of
> DPDK, you can push them to the upper level stack rather instead.
>
> > >
> > > Regards,
> > >
> > > Mirek
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce
> Richardson
> > > > Sent: Tuesday, November 25, 2014 3:30 PM
> > > > To: Neil Horman
> > > > Cc: dev@dpdk.org
> > > > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated
> driver
> > > >
> > > > On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > > > > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > > > > >
> > > > > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > > > > >
> > > > > >
> > > > > > What is "bifurcated driver"?
> > > > > > ===========================
> > > > > >
> > > > > > The "bifurcated driver" stands for the kernel NIC driver that
> supports:
> > > > > >
> > > > > > 1. on-demand rx/tx queue pairs split-off and assignment to user
> space
> > > > > >
> > > > > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user
> space
> > > > > >
> > > > > > 3. distributing packets to kernel or user space rx queues by
> > > > > > NIC's flow director according to the filter rules
> > > > > >
> > > > > > Here's the kernel patch set to support.
> > > > > > http://comments.gmane.org/gmane.linux.network/333615
> > > > > >
> > > > > >
> > > > > > Usage scenario
> > > > > > =================
> > > > > >
> > > > > > It's well accepted by industry to use DPDK to process fast path
> packets in
> > > > > > user space in a high performance fashion, meanwhile processing
> slow
> > > > path
> > > > > > control packets in kernel space is still needed as those packets
> usually
> > > > > > rely on in_kernel TCP/IP stacks and/or socket programming
> interface.
> > > > > >
> > > > > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet
> this
> > > > > > requirement, with below limitation:
> > > > > >
> > > > > > 1) Software classifies packets and distributes them to kernel via
> DPDK
> > > > > > software rings, at the cost of significant CPU cycles and memory
> > > > bandwidth.
> > > > > >
> > > > > > 2) Memory copy packets between kernel' socket buffer and mbuf
> brings
> > > > > > significant negative performance impact to KNI performance.
> > > > > >
> > > > > > The bifurcated driver provides a alternative approach that not only
> > > > offloads
> > > > > > flow classification and distribution to NIC but also support packets
> > > > zero_copy.
> > > > > >
> > > > > > User can use standard ethtool to add filter rules to the NIC in order
> to
> > > > > > distribute specific flows to the queues only accessed by kernel
> driver and
> > > > > > stack, and add other rules to distribute packets to the queues
> assigned to
> > > > > > user-space.
> > > > > >
> > > > > > For those rx/tx queue pairs that directly accessed from user space,
> > > > > > DPDK takes over the packets rx/tx as well as corresponding DMA
> > > > operation
> > > > > > for high performance packet I/O.
> > > > > >
> > > > > >
> > > > > > What's the impact and change to DPDK
> > > > > > ======================================
> > > > > >
> > > > > > DPDK usually binds PCIe NIC devices by leveraging kernel' user
> space
> > > > driver
> > > > > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to
> user
> > > > space.
> > > > > > The bifurcated driver PMD talks to a NIC interface using raw socket
> APIs
> > > > and
> > > > > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing
> > > > involved
> > > > > > rx/tx queue pairs. So the impact and changes mainly comes with
> below:
> > > > > >
> > > > > > - netdev
> > > > > > DPDK needs to create a af_packet socket and bind it to a
> bifurcated
> > > > netdev.
> > > > > > The socket fd will be used to request 'queue pairs info',
> > > > > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> > > > address,
> > > > > > numa info are also from the netdev response.
> > > > > >
> > > > > > - PCIe device scan and driver probe
> > > > > > netdev provides the PCIe device ID information. Refer to the
> device ID,
> > > > > > the correct driver should be used. And for such netdev device,
> the
> > > > creation
> > > > > > of PCIe device is no longer from scan but the on-demand
> assignment.
> > > > > >
> > > > > > - PCIe BAR mapping
> > > > > > "bifurcated driver" maps several pages for the queue pairs.
> > > > > > Others BAR register space maps to a fake page. The BAR mapping
> go
> > > > through
> > > > > > mmap on sockfd. Which is a little different from what UIO/VFIO
> does.
> > > > > >
> > > > > > - PMD
> > > > > > The PMD will no longer really initialize and configure NIC.
> > > > > > Instead, it only takes care the queue pair setup, rx_burst and
> tx_burst.
> > > > > >
> > > > > > The patch uses eal '--vdev' parameter to assign netdev iface name
> and
> > > > number of
> > > > > > queue pairs. Here's a example about how to configure the
> bifurcated
> > > > driver and
> > > > > > run DPDK testpmd with bifurcated PMD.
> > > > > >
> > > > > > 1. Set promisc mode
> > > > > > > ifconfig eth0 promisc
> > > > > >
> > > > > > 2. Turn on fdir
> > > > > > > ethtool -K eth0 ntuple on
> > > > > >
> > > > > > 3. Setup a flow director rule to distribute packets with source ip
> > > > > > 0.0.0.0 to rxq No.0
> > > > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > > > > >
> > > > > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > > > > Note:
> > > > > > iface and qpairs arguments above specify the netdev interface
> name
> > > > and
> > > > > > number of qpairs that user space request from the "bifurcated
> driver"
> > > > > > respectively.
> > > > > >
> > > > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > > > > >
> > > > > > Below illustrates the detailed changes in this patch set.
> > > > > >
> > > > > > eal
> > > > > > --------
> > > > > > The first two patches are all about the eal API declaration and Linux
> > > > version
> > > > > > definition to support af_packet socket and verbs of bifurcated
> netdev.
> > > > > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> > > > map_umem.
> > > > > > And other APIs like set_pci, get_ifinfo and get/put_devargs which
> help to
> > > > > > generate pci device from bifurcated netdev and get basic netdev
> info.
> > > > > >
> > > > > > The third patch is used to allow probing driver on the PCIe VDEV
> created
> > > > from
> > > > > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > > > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > > > > >
> > > > > > librte_bifurc
> > > > > > ---------------
> > > > > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc'
> VDEV
> > > > > > from eal command-line. It generates the PCIe VDEV device ready
> for
> > > > further
> > > > > > driver probe. It maintains the bifurcated device information include
> > > > sockfd,
> > > > > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring
> access
> > > > PMD
> > > > > > to apply for bifurcated device info.
> > > > > >
> > > > > > direct ring access PMD
> > > > > > -------------------------
> > > > > > The patch provides direct ring access PMD for ixgbe. Comparing to
> the
> > > > normal
> > > > > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self
> registration.
> > > > > > It mostly reuses the existing PMD ops to avoid re-implementing
> > > > everything
> > > > > > from scratch. And it also modifies the rx/tx_queue_setup to allow
> queue
> > > > > > setup from any queue offset.
> > > > > >
> > > > > > Supported NIC driver
> > > > > > ========================
> > > > > >
> > > > > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at
> the
> > > > moment,
> > > > > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings
> as
> > > > sample.
> > > > > > The support for 40GE(i40e) will be added in the future.
> > > > > >
> > > > > > In addition, for those multi-queues enabled NIC with flow director
> > > > capability
> > > > > > to do perform packet classification and distribution, there's no
> special
> > > > > > technical gap to provide bifurcated driver approach support.
> > > > > >
> > > > > > Limitation
> > > > > > ============
> > > > > >
> > > > > > By using "bifurcated driver", user space only takes over the DMA
> > > > operation.
> > > > > > For those NIC configure setting, it's out of control from user space
> PMD.
> > > > > > All the NIC setting including add/del filter rules need to be done by
> > > > > > standard Linux network tools(e.g. ethtool).
> > > > > > So the feature support really depend on how much are supported
> by
> > > > ethtool.
> > > > > >
> > > > > >
> > > > > > Any questions, comments and feedback are welcome.
> > > > > >
> > > > > >
> > > > > > -END-
> > > > > >
> > > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > > >
> > > > > > *** BLURB HERE ***
> > > > > >
> > > > > > Cunming Liang (6):
> > > > > > eal: common direct ring access API
> > > > > > eal: direct ring access support by linux af_packet
> > > > > > pci: allow VDEV as pci device during device driver probe
> > > > > > bifurc: add driver to scan bifurcated netdev
> > > > > > ixgbe: rx/tx queue stop bug fix
> > > > > > ixgbe: PMD for bifurc ixgbe net device
> > > > > >
> > > > > > config/common_linuxapp | 5 +
> > > > > > lib/Makefile | 1 +
> > > > > > lib/librte_bifurc/Makefile | 58 +++++
> > > > > > lib/librte_bifurc/rte_bifurc.c | 284
> +++++++++++++++++++++
> > > > > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > > > > lib/librte_eal/common/Makefile | 5 +
> > > > > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > > > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186
> ++++++++++++++
> > > > > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > > > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > > > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> > > > +++++++++++++++++++++++++
> > > > > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > > > ++++++++++++++++++++++
> > > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > > > > mk/rte.app.mk | 6 +
> > > > > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > > > > create mode 100644 lib/librte_bifurc/Makefile
> > > > > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > > > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > > > > create mode 100644
> lib/librte_eal/common/include/rte_pci_bifurc.h
> > > > > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > > > >
> > > > > > --
> > > > > > 1.8.1.4
> > > > > >
> > > > > >
> > > > > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it?
> > > > Given
> > > > > that, I don't think its wise to pull this in yet ahead of the kernel work,
> as
> > > > > there may still be kernel side changes that the user space pmd will
> have to
> > > > > adapt to.
> > > > > Neil
> > > > >
> > > > Hence the RFC nature of the patch, I believe. :-) Before the kernel part
> hits
> > > > the
> > > > main kernel tree we can at least discuss the overall direction to be
> taken for
> > > > this driver because it's significantly different that any other HW driver.
> > > >
> > > > /Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-26 10:45 ` Walukiewicz, Miroslaw
@ 2014-11-26 12:22 ` Zhou, Danny
0 siblings, 0 replies; 24+ messages in thread
From: Zhou, Danny @ 2014-11-26 12:22 UTC (permalink / raw)
To: Walukiewicz, Miroslaw, Richardson, Bruce; +Cc: dev
> -----Original Message-----
> From: Walukiewicz, Miroslaw
> Sent: Wednesday, November 26, 2014 6:45 PM
> To: Zhou, Danny; Richardson, Bruce
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
>
> Thank you for explanation.
>
> I have a few questions regarding the setup flow yet:
>
> 1. Why we need this step:
> > 3. Setup a flow director rule to distribute packets with source ip
> > > > > > > 0.0.0.0 to rxq No.0
> > > > > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
>
DZ: By default, ixgbe kernel driver uses 32 (0-31) rx/tx queue pairs. Above example setup a filter
to route a UDP flow with src_ip 0.0.0.0 to queue No.0 which is used by kernel driver' rx/tx routine.
>
> 2. You presented the filter setup for receiving all udp4 packets on specific queue
> > > > > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
>
> How to configure flow director to receive all packets with dst-ip = 1.1.1.1 on qpair=32?
DZ: You can certainly do it using ethtool command-line like "ethtool -N eth0 flow-type udp4 dst-ip 1.1.1.1 action 32" to do it.
> Will TCP SYN packets caught by such filter setup?
DZ: Unfortunately, unlike DPDK that provides ixgbe_add_syn_filter() API to allows program SYN Packet Queue Filter register, the
in_kernel ixgbe kernel driver does not touch that register. While I had seen ixgbe 3.18.7 driver hard-code a value in that register.
For all cases, there is no easy way to use ixgbe bifurcated driver to config it. Under bifurcated mode, DPDK cannot access that register.
> 3. Do we have a possibility to setup a rule like:
> Forward all TCPv4 rx packets with dst-ip =1.1.1.1 and TCP port 2222 to qpair=32 including SYN packets?
DZ: Yes, ethtool and flow director supports that. Will send you a separated email regarding ethtool usage regarding flow director configuration.
> 3. In your application example you present that qpair number (32) is known before start of application
> > > > > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
>
> Is there a possibility to dynamic queue allocation? I ask about API.
> I mean dynamic attaching and detaching queue from application level and not specifying the numbers in the command line.
>
DZ: The example is just for experiment. When DPDK request queue pairs from ixgbe bifurcated driver, it only specify number of qpairs, the kernel
driver actually returns the absolute qpair index of assigned qpairs to application. Application can hence use it to invoke ethtool command-line to do it or
directly invoke IOCTL to bifurcated driver to setup FD.
> 4. Is there a possibility to create a rule with perfect match and directing the packets to the specific queue.
> I mean here a rule like:
> Forward all TCPv4 rx packets with dst-ip=1.1.1.1 src-ip=2.2.2.2 dst-port=2222 src-port=1234 to queue 33
>
DZ: Yes, of course you can.
> Regards,
>
> Mirek
>
> > -----Original Message-----
> > From: Zhou, Danny
> > Sent: Tuesday, November 25, 2014 4:23 PM
> > To: Richardson, Bruce; Walukiewicz, Miroslaw
> > Cc: dev@dpdk.org
> > Subject: RE: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
> >
> >
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> > > Sent: Tuesday, November 25, 2014 11:03 PM
> > > To: Walukiewicz, Miroslaw
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
> > >
> > > On Tue, Nov 25, 2014 at 02:57:13PM +0000, Walukiewicz, Miroslaw wrote:
> > > > Thank you Bruce for explanation of the idea.
> > >
> > > Actually, credit goes to Steve Liang, not me, for the explanation. :-)
> > >
> > > >
> > > > I have question regarding TCP SYN packets? Do you have any idea how to
> > share the TCP SYN requests between kernel and
> > > user-space application?
> > >
> > > As I'm giving the credit to Steve, I'll also pass the buck for answering that
> > > question to him too! :-)
> > >
> > > /Bruce
> >
> > On ixgbe' Rx queuing flow, match SYN filter stage is prior to Flow Director
> > filter stage. When working at bifurcated driver support mode,
> > DPDK cannot access those NIC registers except for the ones that are used to
> > rx/tx packets for assigned rx/tx queue pairs. So basically it really
> > depends on user to use ethtool or other interface to setup SYN filter via
> > ixgbe bifurcated driver. User can distribute TCP SYN packets to
> > kernel bifurcated driver owned rx queues or DPDK owned rx queues, for the
> > latter case, DPDK can still push them back to kernel via KNI if DPDK
> > does not want to use them. If you have a user space TCP/IP stacks on top of
> > DPDK, you can push them to the upper level stack rather instead.
> >
> > > >
> > > > Regards,
> > > >
> > > > Mirek
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce
> > Richardson
> > > > > Sent: Tuesday, November 25, 2014 3:30 PM
> > > > > To: Neil Horman
> > > > > Cc: dev@dpdk.org
> > > > > Subject: Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated
> > driver
> > > > >
> > > > > On Tue, Nov 25, 2014 at 09:23:16AM -0500, Neil Horman wrote:
> > > > > > On Tue, Nov 25, 2014 at 10:11:16PM +0800, Cunming Liang wrote:
> > > > > > >
> > > > > > > This is a RFC patch set to support "bifurcated driver" in DPDK.
> > > > > > >
> > > > > > >
> > > > > > > What is "bifurcated driver"?
> > > > > > > ===========================
> > > > > > >
> > > > > > > The "bifurcated driver" stands for the kernel NIC driver that
> > supports:
> > > > > > >
> > > > > > > 1. on-demand rx/tx queue pairs split-off and assignment to user
> > space
> > > > > > >
> > > > > > > 2. direct NIC resource(e.g. rx/tx queue registers) access from user
> > space
> > > > > > >
> > > > > > > 3. distributing packets to kernel or user space rx queues by
> > > > > > > NIC's flow director according to the filter rules
> > > > > > >
> > > > > > > Here's the kernel patch set to support.
> > > > > > > http://comments.gmane.org/gmane.linux.network/333615
> > > > > > >
> > > > > > >
> > > > > > > Usage scenario
> > > > > > > =================
> > > > > > >
> > > > > > > It's well accepted by industry to use DPDK to process fast path
> > packets in
> > > > > > > user space in a high performance fashion, meanwhile processing
> > slow
> > > > > path
> > > > > > > control packets in kernel space is still needed as those packets
> > usually
> > > > > > > rely on in_kernel TCP/IP stacks and/or socket programming
> > interface.
> > > > > > >
> > > > > > > KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet
> > this
> > > > > > > requirement, with below limitation:
> > > > > > >
> > > > > > > 1) Software classifies packets and distributes them to kernel via
> > DPDK
> > > > > > > software rings, at the cost of significant CPU cycles and memory
> > > > > bandwidth.
> > > > > > >
> > > > > > > 2) Memory copy packets between kernel' socket buffer and mbuf
> > brings
> > > > > > > significant negative performance impact to KNI performance.
> > > > > > >
> > > > > > > The bifurcated driver provides a alternative approach that not only
> > > > > offloads
> > > > > > > flow classification and distribution to NIC but also support packets
> > > > > zero_copy.
> > > > > > >
> > > > > > > User can use standard ethtool to add filter rules to the NIC in order
> > to
> > > > > > > distribute specific flows to the queues only accessed by kernel
> > driver and
> > > > > > > stack, and add other rules to distribute packets to the queues
> > assigned to
> > > > > > > user-space.
> > > > > > >
> > > > > > > For those rx/tx queue pairs that directly accessed from user space,
> > > > > > > DPDK takes over the packets rx/tx as well as corresponding DMA
> > > > > operation
> > > > > > > for high performance packet I/O.
> > > > > > >
> > > > > > >
> > > > > > > What's the impact and change to DPDK
> > > > > > > ======================================
> > > > > > >
> > > > > > > DPDK usually binds PCIe NIC devices by leveraging kernel' user
> > space
> > > > > driver
> > > > > > > mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to
> > user
> > > > > space.
> > > > > > > The bifurcated driver PMD talks to a NIC interface using raw socket
> > APIs
> > > > > and
> > > > > > > only mmap() limited I/O space (e.g. certain 4K pages) for accessing
> > > > > involved
> > > > > > > rx/tx queue pairs. So the impact and changes mainly comes with
> > below:
> > > > > > >
> > > > > > > - netdev
> > > > > > > DPDK needs to create a af_packet socket and bind it to a
> > bifurcated
> > > > > netdev.
> > > > > > > The socket fd will be used to request 'queue pairs info',
> > > > > > > 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> > > > > address,
> > > > > > > numa info are also from the netdev response.
> > > > > > >
> > > > > > > - PCIe device scan and driver probe
> > > > > > > netdev provides the PCIe device ID information. Refer to the
> > device ID,
> > > > > > > the correct driver should be used. And for such netdev device,
> > the
> > > > > creation
> > > > > > > of PCIe device is no longer from scan but the on-demand
> > assignment.
> > > > > > >
> > > > > > > - PCIe BAR mapping
> > > > > > > "bifurcated driver" maps several pages for the queue pairs.
> > > > > > > Others BAR register space maps to a fake page. The BAR mapping
> > go
> > > > > through
> > > > > > > mmap on sockfd. Which is a little different from what UIO/VFIO
> > does.
> > > > > > >
> > > > > > > - PMD
> > > > > > > The PMD will no longer really initialize and configure NIC.
> > > > > > > Instead, it only takes care the queue pair setup, rx_burst and
> > tx_burst.
> > > > > > >
> > > > > > > The patch uses eal '--vdev' parameter to assign netdev iface name
> > and
> > > > > number of
> > > > > > > queue pairs. Here's a example about how to configure the
> > bifurcated
> > > > > driver and
> > > > > > > run DPDK testpmd with bifurcated PMD.
> > > > > > >
> > > > > > > 1. Set promisc mode
> > > > > > > > ifconfig eth0 promisc
> > > > > > >
> > > > > > > 2. Turn on fdir
> > > > > > > > ethtool -K eth0 ntuple on
> > > > > > >
> > > > > > > 3. Setup a flow director rule to distribute packets with source ip
> > > > > > > 0.0.0.0 to rxq No.0
> > > > > > > > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
> > > > > > >
> > > > > > > 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > > > > > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > > > > > > > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > > > > > > > -i --rxfreet=32 --txfreet=32 --txrst=32
> > > > > > > Note:
> > > > > > > iface and qpairs arguments above specify the netdev interface
> > name
> > > > > and
> > > > > > > number of qpairs that user space request from the "bifurcated
> > driver"
> > > > > > > respectively.
> > > > > > >
> > > > > > > 5. Setup a flow director rule to distribute packets with source ip
> > > > > > > 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > > > > > > > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
> > > > > > >
> > > > > > > Below illustrates the detailed changes in this patch set.
> > > > > > >
> > > > > > > eal
> > > > > > > --------
> > > > > > > The first two patches are all about the eal API declaration and Linux
> > > > > version
> > > > > > > definition to support af_packet socket and verbs of bifurcated
> > netdev.
> > > > > > > Those APIs include the verbs like open, bind, (un)map, split/retturn,
> > > > > map_umem.
> > > > > > > And other APIs like set_pci, get_ifinfo and get/put_devargs which
> > help to
> > > > > > > generate pci device from bifurcated netdev and get basic netdev
> > info.
> > > > > > >
> > > > > > > The third patch is used to allow probing driver on the PCIe VDEV
> > created
> > > > > from
> > > > > > > a NIC interface driven by "bifurcated driver". It defines a new flag
> > > > > > > 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
> > > > > > >
> > > > > > > librte_bifurc
> > > > > > > ---------------
> > > > > > > The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc'
> > VDEV
> > > > > > > from eal command-line. It generates the PCIe VDEV device ready
> > for
> > > > > further
> > > > > > > driver probe. It maintains the bifurcated device information include
> > > > > sockfd,
> > > > > > > hwaddr, mtu, qpairs, iface_name. It's used for other direct ring
> > access
> > > > > PMD
> > > > > > > to apply for bifurcated device info.
> > > > > > >
> > > > > > > direct ring access PMD
> > > > > > > -------------------------
> > > > > > > The patch provides direct ring access PMD for ixgbe. Comparing to
> > the
> > > > > normal
> > > > > > > PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self
> > registration.
> > > > > > > It mostly reuses the existing PMD ops to avoid re-implementing
> > > > > everything
> > > > > > > from scratch. And it also modifies the rx/tx_queue_setup to allow
> > queue
> > > > > > > setup from any queue offset.
> > > > > > >
> > > > > > > Supported NIC driver
> > > > > > > ========================
> > > > > > >
> > > > > > > The "bifurcated driver" kernel patch only supports "ixgbe" driver at
> > the
> > > > > moment,
> > > > > > > so this RFC patch also provides "ixgbe" PMD via direct-mapped rings
> > as
> > > > > sample.
> > > > > > > The support for 40GE(i40e) will be added in the future.
> > > > > > >
> > > > > > > In addition, for those multi-queues enabled NIC with flow director
> > > > > capability
> > > > > > > to do perform packet classification and distribution, there's no
> > special
> > > > > > > technical gap to provide bifurcated driver approach support.
> > > > > > >
> > > > > > > Limitation
> > > > > > > ============
> > > > > > >
> > > > > > > By using "bifurcated driver", user space only takes over the DMA
> > > > > operation.
> > > > > > > For those NIC configure setting, it's out of control from user space
> > PMD.
> > > > > > > All the NIC setting including add/del filter rules need to be done by
> > > > > > > standard Linux network tools(e.g. ethtool).
> > > > > > > So the feature support really depend on how much are supported
> > by
> > > > > ethtool.
> > > > > > >
> > > > > > >
> > > > > > > Any questions, comments and feedback are welcome.
> > > > > > >
> > > > > > >
> > > > > > > -END-
> > > > > > >
> > > > > > > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > > > > > > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > > > > > >
> > > > > > > *** BLURB HERE ***
> > > > > > >
> > > > > > > Cunming Liang (6):
> > > > > > > eal: common direct ring access API
> > > > > > > eal: direct ring access support by linux af_packet
> > > > > > > pci: allow VDEV as pci device during device driver probe
> > > > > > > bifurc: add driver to scan bifurcated netdev
> > > > > > > ixgbe: rx/tx queue stop bug fix
> > > > > > > ixgbe: PMD for bifurc ixgbe net device
> > > > > > >
> > > > > > > config/common_linuxapp | 5 +
> > > > > > > lib/Makefile | 1 +
> > > > > > > lib/librte_bifurc/Makefile | 58 +++++
> > > > > > > lib/librte_bifurc/rte_bifurc.c | 284
> > +++++++++++++++++++++
> > > > > > > lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> > > > > > > lib/librte_eal/common/Makefile | 5 +
> > > > > > > lib/librte_eal/common/include/rte_pci.h | 4 +
> > > > > > > lib/librte_eal/common/include/rte_pci_bifurc.h | 186
> > ++++++++++++++
> > > > > > > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > > > > > > lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> > > > > > > lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> > > > > +++++++++++++++++++++++++
> > > > > > > lib/librte_ether/rte_ethdev.c | 3 +-
> > > > > > > lib/librte_pmd_ixgbe/Makefile | 13 +-
> > > > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> > > > > ++++++++++++++++++++++
> > > > > > > lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> > > > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> > > > > > > lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> > > > > > > mk/rte.app.mk | 6 +
> > > > > > > 18 files changed, 1421 insertions(+), 27 deletions(-)
> > > > > > > create mode 100644 lib/librte_bifurc/Makefile
> > > > > > > create mode 100644 lib/librte_bifurc/rte_bifurc.c
> > > > > > > create mode 100644 lib/librte_bifurc/rte_bifurc.h
> > > > > > > create mode 100644
> > lib/librte_eal/common/include/rte_pci_bifurc.h
> > > > > > > create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> > > > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> > > > > > > create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
> > > > > > >
> > > > > > > --
> > > > > > > 1.8.1.4
> > > > > > >
> > > > > > >
> > > > > > AIUI, the bifurcated driver hasn't yet been accepted upstream, has it?
> > > > > Given
> > > > > > that, I don't think its wise to pull this in yet ahead of the kernel work,
> > as
> > > > > > there may still be kernel side changes that the user space pmd will
> > have to
> > > > > > adapt to.
> > > > > > Neil
> > > > > >
> > > > > Hence the RFC nature of the patch, I believe. :-) Before the kernel part
> > hits
> > > > > the
> > > > > main kernel tree we can at least discuss the overall direction to be
> > taken for
> > > > > this driver because it's significantly different that any other HW driver.
> > > > >
> > > > > /Bruce
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2014-11-25 14:11 [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Cunming Liang
` (6 preceding siblings ...)
2014-11-25 14:23 ` [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver Neil Horman
@ 2015-04-09 3:43 ` 贾学涛
2015-04-20 9:53 ` Shelton Chia
7 siblings, 1 reply; 24+ messages in thread
From: 贾学涛 @ 2015-04-09 3:43 UTC (permalink / raw)
To: Cunming Liang, dev
Hi Cunming,
I applyed bifurc dirver patches and tested it follow your example. But
I can't received packets with testpmd and l2fwd.
Kernel stack can receive packets from 10.0.0.2 before "ethtool -N
XGE4.1 flow-type ip4 src-ip 10.0.0.2 action 12". After "thtool -N XGE4.1
flow-type ip4 src-ip 10.0.0.2 action 12", kernel stack can't receive
packets from 10.0.0.2, but testpmd and l2fwd cannot receive any packets
too.
queue 0-11 used by kernel and queue 12 used by bifurc dirver.
How can I make it work?
2014-11-25 22:11 GMT+08:00 Cunming Liang <cunming.liang@intel.com>:
>
> This is a RFC patch set to support "bifurcated driver" in DPDK.
>
>
> What is "bifurcated driver"?
> ===========================
>
> The "bifurcated driver" stands for the kernel NIC driver that supports:
>
> 1. on-demand rx/tx queue pairs split-off and assignment to user space
>
> 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
>
> 3. distributing packets to kernel or user space rx queues by
> NIC's flow director according to the filter rules
>
> Here's the kernel patch set to support.
> http://comments.gmane.org/gmane.linux.network/333615
>
>
> Usage scenario
> =================
>
> It's well accepted by industry to use DPDK to process fast path packets in
> user space in a high performance fashion, meanwhile processing slow path
> control packets in kernel space is still needed as those packets usually
> rely on in_kernel TCP/IP stacks and/or socket programming interface.
>
> KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
> requirement, with below limitation:
>
> 1) Software classifies packets and distributes them to kernel via DPDK
> software rings, at the cost of significant CPU cycles and memory
> bandwidth.
>
> 2) Memory copy packets between kernel' socket buffer and mbuf brings
> significant negative performance impact to KNI performance.
>
> The bifurcated driver provides a alternative approach that not only
> offloads
> flow classification and distribution to NIC but also support packets
> zero_copy.
>
> User can use standard ethtool to add filter rules to the NIC in order to
> distribute specific flows to the queues only accessed by kernel driver and
> stack, and add other rules to distribute packets to the queues assigned to
> user-space.
>
> For those rx/tx queue pairs that directly accessed from user space,
> DPDK takes over the packets rx/tx as well as corresponding DMA operation
> for high performance packet I/O.
>
>
> What's the impact and change to DPDK
> ======================================
>
> DPDK usually binds PCIe NIC devices by leveraging kernel' user space driver
> mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
> space.
> The bifurcated driver PMD talks to a NIC interface using raw socket APIs
> and
> only mmap() limited I/O space (e.g. certain 4K pages) for accessing
> involved
> rx/tx queue pairs. So the impact and changes mainly comes with below:
>
> - netdev
> DPDK needs to create a af_packet socket and bind it to a bifurcated
> netdev.
> The socket fd will be used to request 'queue pairs info',
> 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
> address,
> numa info are also from the netdev response.
>
> - PCIe device scan and driver probe
> netdev provides the PCIe device ID information. Refer to the device ID,
> the correct driver should be used. And for such netdev device, the
> creation
> of PCIe device is no longer from scan but the on-demand assignment.
>
> - PCIe BAR mapping
> "bifurcated driver" maps several pages for the queue pairs.
> Others BAR register space maps to a fake page. The BAR mapping go
> through
> mmap on sockfd. Which is a little different from what UIO/VFIO does.
>
> - PMD
> The PMD will no longer really initialize and configure NIC.
> Instead, it only takes care the queue pair setup, rx_burst and
> tx_burst.
>
> The patch uses eal '--vdev' parameter to assign netdev iface name and
> number of
> queue pairs. Here's a example about how to configure the bifurcated driver
> and
> run DPDK testpmd with bifurcated PMD.
>
> 1. Set promisc mode
> > ifconfig eth0 promisc
>
> 2. Turn on fdir
> > ethtool -K eth0 ntuple on
>
> 3. Setup a flow director rule to distribute packets with source ip
> 0.0.0.0 to rxq No.0
> > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
>
> 4. Run testpmd on netdev 'eth0' with 1 queue pair.
> > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
> > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
> > -i --rxfreet=32 --txfreet=32 --txrst=32
> Note:
> iface and qpairs arguments above specify the netdev interface name and
> number of qpairs that user space request from the "bifurcated driver"
> respectively.
>
> 5. Setup a flow director rule to distribute packets with source ip
> 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
> > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
>
> Below illustrates the detailed changes in this patch set.
>
> eal
> --------
> The first two patches are all about the eal API declaration and Linux
> version
> definition to support af_packet socket and verbs of bifurcated netdev.
> Those APIs include the verbs like open, bind, (un)map, split/retturn,
> map_umem.
> And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
> generate pci device from bifurcated netdev and get basic netdev info.
>
> The third patch is used to allow probing driver on the PCIe VDEV created
> from
> a NIC interface driven by "bifurcated driver". It defines a new flag
> 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
>
> librte_bifurc
> ---------------
> The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
> from eal command-line. It generates the PCIe VDEV device ready for further
> driver probe. It maintains the bifurcated device information include
> sockfd,
> hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access PMD
> to apply for bifurcated device info.
>
> direct ring access PMD
> -------------------------
> The patch provides direct ring access PMD for ixgbe. Comparing to the
> normal
> PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
> It mostly reuses the existing PMD ops to avoid re-implementing everything
> from scratch. And it also modifies the rx/tx_queue_setup to allow queue
> setup from any queue offset.
>
> Supported NIC driver
> ========================
>
> The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
> moment,
> so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
> sample.
> The support for 40GE(i40e) will be added in the future.
>
> In addition, for those multi-queues enabled NIC with flow director
> capability
> to do perform packet classification and distribution, there's no special
> technical gap to provide bifurcated driver approach support.
>
> Limitation
> ============
>
> By using "bifurcated driver", user space only takes over the DMA operation.
> For those NIC configure setting, it's out of control from user space PMD.
> All the NIC setting including add/del filter rules need to be done by
> standard Linux network tools(e.g. ethtool).
> So the feature support really depend on how much are supported by ethtool.
>
>
> Any questions, comments and feedback are welcome.
>
>
> -END-
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
>
> *** BLURB HERE ***
>
> Cunming Liang (6):
> eal: common direct ring access API
> eal: direct ring access support by linux af_packet
> pci: allow VDEV as pci device during device driver probe
> bifurc: add driver to scan bifurcated netdev
> ixgbe: rx/tx queue stop bug fix
> ixgbe: PMD for bifurc ixgbe net device
>
> config/common_linuxapp | 5 +
> lib/Makefile | 1 +
> lib/librte_bifurc/Makefile | 58 +++++
> lib/librte_bifurc/rte_bifurc.c | 284 +++++++++++++++++++++
> lib/librte_bifurc/rte_bifurc.h | 90 +++++++
> lib/librte_eal/common/Makefile | 5 +
> lib/librte_eal/common/include/rte_pci.h | 4 +
> lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
> lib/librte_eal/linuxapp/eal/Makefile | 1 +
> lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
> lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
> +++++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.c | 3 +-
> lib/librte_pmd_ixgbe/Makefile | 13 +-
> lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
> ++++++++++++++++++++++
> lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
> lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
> mk/rte.app.mk | 6 +
> 18 files changed, 1421 insertions(+), 27 deletions(-)
> create mode 100644 lib/librte_bifurc/Makefile
> create mode 100644 lib/librte_bifurc/rte_bifurc.c
> create mode 100644 lib/librte_bifurc/rte_bifurc.h
> create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
> create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
>
> --
> 1.8.1.4
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver
2015-04-09 3:43 ` 贾学涛
@ 2015-04-20 9:53 ` Shelton Chia
0 siblings, 0 replies; 24+ messages in thread
From: Shelton Chia @ 2015-04-20 9:53 UTC (permalink / raw)
To: Cunming Liang, dev
Hi,
I can receive packets when I mmaped all pci memory not only rx and tx
desc.
2015-04-09 11:43 GMT+08:00 贾学涛 <jiaxt@sinogrid.com>:
> Hi Cunming,
> I applyed bifurc dirver patches and tested it follow your example.
> But I can't received packets with testpmd and l2fwd.
> Kernel stack can receive packets from 10.0.0.2 before "ethtool -N
> XGE4.1 flow-type ip4 src-ip 10.0.0.2 action 12". After "thtool -N XGE4.1
> flow-type ip4 src-ip 10.0.0.2 action 12", kernel stack can't receive
> packets from 10.0.0.2, but testpmd and l2fwd cannot receive any packets
> too.
> queue 0-11 used by kernel and queue 12 used by bifurc dirver.
> How can I make it work?
>
> 2014-11-25 22:11 GMT+08:00 Cunming Liang <cunming.liang@intel.com>:
>
>>
>> This is a RFC patch set to support "bifurcated driver" in DPDK.
>>
>>
>> What is "bifurcated driver"?
>> ===========================
>>
>> The "bifurcated driver" stands for the kernel NIC driver that supports:
>>
>> 1. on-demand rx/tx queue pairs split-off and assignment to user space
>>
>> 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
>>
>> 3. distributing packets to kernel or user space rx queues by
>> NIC's flow director according to the filter rules
>>
>> Here's the kernel patch set to support.
>> http://comments.gmane.org/gmane.linux.network/333615
>>
>>
>> Usage scenario
>> =================
>>
>> It's well accepted by industry to use DPDK to process fast path packets in
>> user space in a high performance fashion, meanwhile processing slow path
>> control packets in kernel space is still needed as those packets usually
>> rely on in_kernel TCP/IP stacks and/or socket programming interface.
>>
>> KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
>> requirement, with below limitation:
>>
>> 1) Software classifies packets and distributes them to kernel via DPDK
>> software rings, at the cost of significant CPU cycles and memory
>> bandwidth.
>>
>> 2) Memory copy packets between kernel' socket buffer and mbuf brings
>> significant negative performance impact to KNI performance.
>>
>> The bifurcated driver provides a alternative approach that not only
>> offloads
>> flow classification and distribution to NIC but also support packets
>> zero_copy.
>>
>> User can use standard ethtool to add filter rules to the NIC in order to
>> distribute specific flows to the queues only accessed by kernel driver and
>> stack, and add other rules to distribute packets to the queues assigned to
>> user-space.
>>
>> For those rx/tx queue pairs that directly accessed from user space,
>> DPDK takes over the packets rx/tx as well as corresponding DMA operation
>> for high performance packet I/O.
>>
>>
>> What's the impact and change to DPDK
>> ======================================
>>
>> DPDK usually binds PCIe NIC devices by leveraging kernel' user space
>> driver
>> mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
>> space.
>> The bifurcated driver PMD talks to a NIC interface using raw socket APIs
>> and
>> only mmap() limited I/O space (e.g. certain 4K pages) for accessing
>> involved
>> rx/tx queue pairs. So the impact and changes mainly comes with below:
>>
>> - netdev
>> DPDK needs to create a af_packet socket and bind it to a bifurcated
>> netdev.
>> The socket fd will be used to request 'queue pairs info',
>> 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
>> address,
>> numa info are also from the netdev response.
>>
>> - PCIe device scan and driver probe
>> netdev provides the PCIe device ID information. Refer to the device
>> ID,
>> the correct driver should be used. And for such netdev device, the
>> creation
>> of PCIe device is no longer from scan but the on-demand assignment.
>>
>> - PCIe BAR mapping
>> "bifurcated driver" maps several pages for the queue pairs.
>> Others BAR register space maps to a fake page. The BAR mapping go
>> through
>> mmap on sockfd. Which is a little different from what UIO/VFIO does.
>>
>> - PMD
>> The PMD will no longer really initialize and configure NIC.
>> Instead, it only takes care the queue pair setup, rx_burst and
>> tx_burst.
>>
>> The patch uses eal '--vdev' parameter to assign netdev iface name and
>> number of
>> queue pairs. Here's a example about how to configure the bifurcated
>> driver and
>> run DPDK testpmd with bifurcated PMD.
>>
>> 1. Set promisc mode
>> > ifconfig eth0 promisc
>>
>> 2. Turn on fdir
>> > ethtool -K eth0 ntuple on
>>
>> 3. Setup a flow director rule to distribute packets with source ip
>> 0.0.0.0 to rxq No.0
>> > ethtool -N eth0 flow-type udp4 src-ip 0.0.0.0 action 0
>>
>> 4. Run testpmd on netdev 'eth0' with 1 queue pair.
>> > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
>> > --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
>> > -i --rxfreet=32 --txfreet=32 --txrst=32
>> Note:
>> iface and qpairs arguments above specify the netdev interface name and
>> number of qpairs that user space request from the "bifurcated driver"
>> respectively.
>>
>> 5. Setup a flow director rule to distribute packets with source ip
>> 1.1.1.1 to rxq No.32. This needs to be done after testpmd starts.
>> > ethtool -N eth0 flow-type udp4 src-ip 1.1.1.1 action 32
>>
>> Below illustrates the detailed changes in this patch set.
>>
>> eal
>> --------
>> The first two patches are all about the eal API declaration and Linux
>> version
>> definition to support af_packet socket and verbs of bifurcated netdev.
>> Those APIs include the verbs like open, bind, (un)map, split/retturn,
>> map_umem.
>> And other APIs like set_pci, get_ifinfo and get/put_devargs which help to
>> generate pci device from bifurcated netdev and get basic netdev info.
>>
>> The third patch is used to allow probing driver on the PCIe VDEV created
>> from
>> a NIC interface driven by "bifurcated driver". It defines a new flag
>> 'RTE_PCI_DRV_BIFURC' used for direct ring access PMD.
>>
>> librte_bifurc
>> ---------------
>> The library is used as a VDEV bus driver to scan '--vdev=rte_bifurc' VDEV
>> from eal command-line. It generates the PCIe VDEV device ready for further
>> driver probe. It maintains the bifurcated device information include
>> sockfd,
>> hwaddr, mtu, qpairs, iface_name. It's used for other direct ring access
>> PMD
>> to apply for bifurcated device info.
>>
>> direct ring access PMD
>> -------------------------
>> The patch provides direct ring access PMD for ixgbe. Comparing to the
>> normal
>> PMD ixgbe, it uses 'RTE_PCI_DRV_BIFURC' flag during self registration.
>> It mostly reuses the existing PMD ops to avoid re-implementing everything
>> from scratch. And it also modifies the rx/tx_queue_setup to allow queue
>> setup from any queue offset.
>>
>> Supported NIC driver
>> ========================
>>
>> The "bifurcated driver" kernel patch only supports "ixgbe" driver at the
>> moment,
>> so this RFC patch also provides "ixgbe" PMD via direct-mapped rings as
>> sample.
>> The support for 40GE(i40e) will be added in the future.
>>
>> In addition, for those multi-queues enabled NIC with flow director
>> capability
>> to do perform packet classification and distribution, there's no special
>> technical gap to provide bifurcated driver approach support.
>>
>> Limitation
>> ============
>>
>> By using "bifurcated driver", user space only takes over the DMA
>> operation.
>> For those NIC configure setting, it's out of control from user space PMD.
>> All the NIC setting including add/del filter rules need to be done by
>> standard Linux network tools(e.g. ethtool).
>> So the feature support really depend on how much are supported by ethtool.
>>
>>
>> Any questions, comments and feedback are welcome.
>>
>>
>> -END-
>>
>> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
>> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
>>
>> *** BLURB HERE ***
>>
>> Cunming Liang (6):
>> eal: common direct ring access API
>> eal: direct ring access support by linux af_packet
>> pci: allow VDEV as pci device during device driver probe
>> bifurc: add driver to scan bifurcated netdev
>> ixgbe: rx/tx queue stop bug fix
>> ixgbe: PMD for bifurc ixgbe net device
>>
>> config/common_linuxapp | 5 +
>> lib/Makefile | 1 +
>> lib/librte_bifurc/Makefile | 58 +++++
>> lib/librte_bifurc/rte_bifurc.c | 284
>> +++++++++++++++++++++
>> lib/librte_bifurc/rte_bifurc.h | 90 +++++++
>> lib/librte_eal/common/Makefile | 5 +
>> lib/librte_eal/common/include/rte_pci.h | 4 +
>> lib/librte_eal/common/include/rte_pci_bifurc.h | 186 ++++++++++++++
>> lib/librte_eal/linuxapp/eal/Makefile | 1 +
>> lib/librte_eal/linuxapp/eal/eal_pci.c | 42 ++--
>> lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336
>> +++++++++++++++++++++++++
>> lib/librte_ether/rte_ethdev.c | 3 +-
>> lib/librte_pmd_ixgbe/Makefile | 13 +-
>> lib/librte_pmd_ixgbe/ixgbe_bifurcate.c | 303
>> ++++++++++++++++++++++
>> lib/librte_pmd_ixgbe/ixgbe_bifurcate.h | 57 +++++
>> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 44 +++-
>> lib/librte_pmd_ixgbe/ixgbe_rxtx.h | 10 +
>> mk/rte.app.mk | 6 +
>> 18 files changed, 1421 insertions(+), 27 deletions(-)
>> create mode 100644 lib/librte_bifurc/Makefile
>> create mode 100644 lib/librte_bifurc/rte_bifurc.c
>> create mode 100644 lib/librte_bifurc/rte_bifurc.h
>> create mode 100644 lib/librte_eal/common/include/rte_pci_bifurc.h
>> create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
>> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.c
>> create mode 100644 lib/librte_pmd_ixgbe/ixgbe_bifurcate.h
>>
>> --
>> 1.8.1.4
>>
>>
>
^ permalink raw reply [flat|nested] 24+ messages in thread