From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wm0-f48.google.com (mail-wm0-f48.google.com [74.125.82.48]) by dpdk.org (Postfix) with ESMTP id F1756A0C2 for ; Wed, 2 Aug 2017 16:11:07 +0200 (CEST) Received: by mail-wm0-f48.google.com with SMTP id m85so42753601wma.1 for ; Wed, 02 Aug 2017 07:11:07 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=6wind-com.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references :in-reply-to:references; bh=hyzVvOIYLh0h6Z1j+jdyjSlBlDqs3xazFeT2Co2wUlY=; b=aThtRbOBrunu8neH7gVPgUphqAVn5UexTea1CRoMqVr4lJZQnDAaUOI1iRd8beegYY qcWC28EJ+4p91dTSGECbOwxttrR8lDYZJAJ+YSDs5pFPQ/uuwjaHAbKsM5kuF3czV4k9 Vz/mDzlsHI+HNTgzHAjCIt1zaVTNWH5s5G58CsfgWBLigf+74jIGlBaEvsuvbUTfEbub nIew8+N47rM43XFMhNTfNUcmqXiyRV78mKYK4FcuzqxzOmZ/X1cj3CVydEqwexxl/ev1 TMg5nUO5hvNug/7jIDyDfgi3/M3otV2vPdjF6v/qQQK8CBHpl6l2g+tpzpreB7m+3QGU +ABQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:in-reply-to:references; bh=hyzVvOIYLh0h6Z1j+jdyjSlBlDqs3xazFeT2Co2wUlY=; b=tTRCbU8SGS6T2noijfZHu9fKT7/N2j9t1YuWFLyXrJ11b7o4MflqP/9f0enFwP1vKz +VAKgrXiU4ctncu6E4ywLxKc4tdC7AI0v1WhF6TYIn7L/IqzIU4KbCRtqpkKnpGS7s4e VaU5AlsOQcygLhBQXJ57/zPzSiG2G7sADpttewivQbRZTPeS7D7moiR0O1RPIHIUFnX/ P439Ai9LIAzT1A/3fFCzw83yYjaS4+G4qDoQpgMEwdC+TA8rJ1sNDPiC3HQjQwvPKXkH 2JPc5VSXgBOzOptyAN4k7UZ+zJ1HGEE36mKS1J2+iUzl83FgDXV0XHi/X+NivchSoZ5D v7tA== X-Gm-Message-State: AIVw112oZaX1AlOqRAFk2HoXI2WkDqsTzcCH5p1emoZlItcBGExeH1G8 zclkCuXuMk2rbSUtUqRwdg== X-Received: by 10.28.71.91 with SMTP id u88mr3908445wma.44.1501683066247; Wed, 02 Aug 2017 07:11:06 -0700 (PDT) Received: from ping.dev.6wind.com (host.78.145.23.62.rev.coltfrance.com. [62.23.145.78]) by smtp.gmail.com with ESMTPSA id d53sm39449552wrd.81.2017.08.02.07.11.05 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Wed, 02 Aug 2017 07:11:05 -0700 (PDT) From: Nelio Laranjeiro To: dev@dpdk.org Cc: adrien.mazarguil@6wind.com Date: Wed, 2 Aug 2017 16:10:24 +0200 Message-Id: <8665c7e875ddcdd954322a8b42fa36d22939069a.1501681927.git.nelio.laranjeiro@6wind.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v1 08/21] net/mlx5: separate DPDK from Verbs Rx queue objects X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Aug 2017 14:11:08 -0000 Signed-off-by: Nelio Laranjeiro --- drivers/net/mlx5/mlx5.c | 3 + drivers/net/mlx5/mlx5.h | 2 +- drivers/net/mlx5/mlx5_flow.c | 97 +++----- drivers/net/mlx5/mlx5_rxq.c | 564 ++++++++++++++++++++++++++----------------- drivers/net/mlx5/mlx5_rxtx.h | 26 +- drivers/net/mlx5/mlx5_vlan.c | 2 +- 6 files changed, 401 insertions(+), 293 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 0d8ca52..c158d8e 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -200,6 +200,9 @@ mlx5_dev_close(struct rte_eth_dev *dev) } if (priv->reta_idx != NULL) rte_free(priv->reta_idx); + i = mlx5_priv_rxq_ibv_verify(priv); + if (i) + WARN("%p: some Verbs Rx queue still remain", (void*)priv); i = priv_flow_verify(priv); if (i) WARN("%p: some flows still remain", (void*)priv); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 1ae5f59..228fd34 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -146,6 +146,7 @@ struct priv { struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */ TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */ LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */ + LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */ uint32_t link_speed_capa; /* Link speed capabilities. */ struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ @@ -287,7 +288,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *); int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *); int priv_flow_start(struct priv *); void priv_flow_stop(struct priv *); -int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *); int priv_flow_verify(struct priv *); /* mlx5_mr.c */ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index bcbb984..9ed8d05 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -95,11 +95,11 @@ struct rte_flow { struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */ struct ibv_exp_wq *wq; /**< Verbs work queue. */ struct ibv_cq *cq; /**< Verbs completion queue. */ - uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */ uint32_t mark:1; /**< Set if the flow is marked. */ uint32_t drop:1; /**< Drop queue. */ uint64_t hash_fields; /**< Fields that participate in the hash. */ - struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */ + uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */ + uint16_t queues_n; /**< Number of queues in the list. */ }; /** Static initializer for items. */ @@ -1097,23 +1097,21 @@ priv_flow_create_action_queue(struct priv *priv, assert(priv->pd); assert(priv->ctx); assert(!flow->actions.drop); - rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) + - sizeof(*rte_flow->rxqs) * flow->actions.queues_n, - 0); + rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0); if (!rte_flow) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "cannot allocate flow memory"); return NULL; } for (i = 0; i < flow->actions.queues_n; ++i) { - struct mlx5_rxq_ctrl *rxq; + struct mlx5_rxq_ibv *rxq = + mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]); - rxq = container_of((*priv->rxqs)[flow->actions.queues[i]], - struct mlx5_rxq_ctrl, rxq); wqs[i] = rxq->wq; - rte_flow->rxqs[i] = &rxq->rxq; - ++rte_flow->rxqs_n; - rxq->rxq.mark |= flow->actions.mark; + rte_flow->queues[i] = flow->actions.queues[i]; + ++rte_flow->queues_n; + (*priv->rxqs)[flow->actions.queues[i]]->mark |= + flow->actions.mark; } /* finalise indirection table. */ for (j = 0; i < wqs_n; ++i, ++j) { @@ -1294,6 +1292,8 @@ static void priv_flow_destroy(struct priv *priv, struct rte_flow *flow) { + unsigned int i; + TAILQ_REMOVE(&priv->flows, flow, next); if (flow->ibv_flow) claim_zero(ibv_exp_destroy_flow(flow->ibv_flow)); @@ -1303,37 +1303,33 @@ priv_flow_destroy(struct priv *priv, claim_zero(ibv_destroy_qp(flow->qp)); if (flow->ind_table) claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table)); - if (flow->mark) { + for (i = 0; i != flow->queues_n; ++i) { struct rte_flow *tmp; - struct mlx5_rxq_data *rxq; - uint32_t mark_n = 0; - uint32_t queue_n; + struct mlx5_rxq_data *rxq = (*priv->rxqs)[flow->queues[i]]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); /* * To remove the mark from the queue, the queue must not be * present in any other marked flow (RSS or not). */ - for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) { - rxq = flow->rxqs[queue_n]; - for (tmp = TAILQ_FIRST(&priv->flows); - tmp; - tmp = TAILQ_NEXT(tmp, next)) { - uint32_t tqueue_n; + if (flow->mark) { + int mark = 0; + + TAILQ_FOREACH(tmp, &priv->flows, next) { + unsigned int j; if (tmp->drop) continue; - for (tqueue_n = 0; - tqueue_n < tmp->rxqs_n; - ++tqueue_n) { - struct mlx5_rxq_data *trxq; - - trxq = tmp->rxqs[tqueue_n]; - if (rxq == trxq) - ++mark_n; - } + if (!tmp->mark) + continue; + for (j = 0; (j != tmp->queues_n) && !mark; j++) + if (tmp->queues[j] == flow->queues[i]) + mark = 1; } - rxq->mark = !!mark_n; + rxq->mark = mark; } + mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv); } free: rte_free(flow->ibv_attr); @@ -1532,8 +1528,8 @@ priv_flow_stop(struct priv *priv) if (flow->mark) { unsigned int n; - for (n = 0; n < flow->rxqs_n; ++n) - flow->rxqs[n]->mark = 0; + for (n = 0; n < flow->queues_n; ++n) + (*priv->rxqs)[flow->queues[n]]->mark = 0; } DEBUG("Flow %p removed", (void *)flow); } @@ -1575,39 +1571,8 @@ priv_flow_start(struct priv *priv) if (flow->mark) { unsigned int n; - for (n = 0; n < flow->rxqs_n; ++n) - flow->rxqs[n]->mark = 1; - } - } - return 0; -} - -/** - * Verify if the Rx queue is used in a flow. - * - * @param priv - * Pointer to private structure. - * @param rxq - * Pointer to the queue to search. - * - * @return - * Nonzero if the queue is used by a flow. - */ -int -priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq) -{ - struct rte_flow *flow; - - for (flow = TAILQ_FIRST(&priv->flows); - flow; - flow = TAILQ_NEXT(flow, next)) { - unsigned int n; - - if (flow->drop) - continue; - for (n = 0; n < flow->rxqs_n; ++n) { - if (flow->rxqs[n] == rxq) - return 1; + for (n = 0; n < flow->queues_n; ++n) + (*priv->rxqs)[flow->queues[n]]->mark = 1; } } return 0; diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 80cfd96..1663734 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -378,7 +378,7 @@ priv_create_hash_rxqs(struct priv *priv) rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]], struct mlx5_rxq_ctrl, rxq); - wqs[i] = rxq_ctrl->wq; + wqs[i] = rxq_ctrl->ibv->wq; } /* Get number of hash RX queues to configure. */ for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) @@ -647,8 +647,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n) /* Iterate on segments. */ for (i = 0; (i != elts_n); ++i) { struct rte_mbuf *buf; - volatile struct mlx5_wqe_data_seg *scat = - &(*rxq_ctrl->rxq.wqes)[i]; buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); if (buf == NULL) { @@ -669,13 +667,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n) DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); PKT_LEN(buf) = DATA_LEN(buf); NB_SEGS(buf) = 1; - /* scat->addr must be able to store a pointer. */ - assert(sizeof(scat->addr) >= sizeof(uintptr_t)); - *scat = (struct mlx5_wqe_data_seg){ - .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), - .byte_count = htonl(DATA_LEN(buf)), - .lkey = rxq_ctrl->mr->lkey, - }; (*rxq_ctrl->rxq.elts)[i] = buf; } if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { @@ -761,65 +752,12 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) { DEBUG("cleaning up %p", (void *)rxq_ctrl); rxq_free_elts(rxq_ctrl); - if (rxq_ctrl->wq != NULL) - claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq)); - if (rxq_ctrl->cq != NULL) - claim_zero(ibv_destroy_cq(rxq_ctrl->cq)); - if (rxq_ctrl->channel != NULL) - claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel)); - if (rxq_ctrl->mr != NULL) - priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr); + if (rxq_ctrl->ibv) + mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv); memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); } /** - * Initialize RX queue. - * - * @param tmpl - * Pointer to RX queue control template. - * - * @return - * 0 on success, errno value on failure. - */ -static inline int -rxq_setup(struct mlx5_rxq_ctrl *tmpl) -{ - struct ibv_cq *ibcq = tmpl->cq; - struct ibv_mlx5_cq_info cq_info; - struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq); - const uint16_t desc_n = - (1 << tmpl->rxq.elts_n) + tmpl->priv->rx_vec_en * - MLX5_VPMD_DESCS_PER_LOOP; - struct rte_mbuf *(*elts)[desc_n] = - rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket); - if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) { - ERROR("Unable to query CQ info. check your OFED."); - return ENOTSUP; - } - if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { - ERROR("Wrong MLX5_CQE_SIZE environment variable value: " - "it should be set to %u", RTE_CACHE_LINE_SIZE); - return EINVAL; - } - if (elts == NULL) - return ENOMEM; - tmpl->rxq.rq_db = rwq->rq.db; - tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt); - tmpl->rxq.cq_ci = 0; - tmpl->rxq.rq_ci = 0; - tmpl->rxq.rq_pi = 0; - tmpl->rxq.cq_db = cq_info.dbrec; - tmpl->rxq.wqes = - (volatile struct mlx5_wqe_data_seg (*)[]) - (uintptr_t)rwq->rq.buff; - tmpl->rxq.cqes = - (volatile struct mlx5_cqe (*)[]) - (uintptr_t)cq_info.buf; - tmpl->rxq.elts = elts; - return 0; -} - -/** * Configure a RX queue. * * @param dev @@ -848,25 +786,24 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, .priv = priv, .socket = socket, .rxq = { + .elts = rte_calloc_socket("RXQ", 1, + desc * + sizeof(struct rte_mbuf *), 0, + socket), .elts_n = log2above(desc), .mp = mp, .rss_hash = priv->rxqs_n > 1, }, }; - struct ibv_exp_wq_attr mod; - union { - struct ibv_exp_cq_init_attr cq; - struct ibv_exp_wq_init_attr wq; - struct ibv_exp_cq_attr cq_attr; - } attr; unsigned int mb_len = rte_pktmbuf_data_room_size(mp); - unsigned int cqe_n = desc - 1; const uint16_t desc_n = desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; struct rte_mbuf *(*elts)[desc_n] = NULL; int ret = 0; (void)conf; /* Thresholds configuration (ignored). */ + if (dev->data->dev_conf.intr_conf.rxq) + tmpl.memory_channel = 1; /* Enable scattered packets support for this queue if necessary. */ assert(mb_len >= RTE_PKTMBUF_HEADROOM); if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= @@ -919,78 +856,13 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, if (priv->hw_csum_l2tun) tmpl.rxq.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum; - /* Use the entire RX mempool as the memory region. */ - tmpl.mr = priv_mr_get(priv, mp); - if (tmpl.mr == NULL) { - tmpl.mr = priv_mr_new(priv, mp); - if (tmpl.mr == NULL) { - ret = EINVAL; - ERROR("%p: MR creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - } - if (dev->data->dev_conf.intr_conf.rxq) { - tmpl.channel = ibv_create_comp_channel(priv->ctx); - if (tmpl.channel == NULL) { - ret = ENOMEM; - ERROR("%p: Rx interrupt completion channel creation" - " failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - } - attr.cq = (struct ibv_exp_cq_init_attr){ - .comp_mask = 0, - }; - if (priv->cqe_comp) { - attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS; - attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE; - /* - * For vectorized Rx, it must not be doubled in order to - * make cq_ci and rq_ci aligned. - */ - if (rxq_check_vec_support(&tmpl.rxq) < 0) - cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */ - } - tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0, - &attr.cq); - if (tmpl.cq == NULL) { - ret = ENOMEM; - ERROR("%p: CQ creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - DEBUG("priv->device_attr.max_qp_wr is %d", - priv->device_attr.max_qp_wr); - DEBUG("priv->device_attr.max_sge is %d", - priv->device_attr.max_sge); /* Configure VLAN stripping. */ tmpl.rxq.vlan_strip = (priv->hw_vlan_strip && !!dev->data->dev_conf.rxmode.hw_vlan_strip); - attr.wq = (struct ibv_exp_wq_init_attr){ - .wq_context = NULL, /* Could be useful in the future. */ - .wq_type = IBV_EXP_WQT_RQ, - /* Max number of outstanding WRs. */ - .max_recv_wr = desc >> tmpl.rxq.sges_n, - /* Max number of scatter/gather elements in a WR. */ - .max_recv_sge = 1 << tmpl.rxq.sges_n, - .pd = priv->pd, - .cq = tmpl.cq, - .comp_mask = - IBV_EXP_CREATE_WQ_VLAN_OFFLOADS | - 0, - .vlan_offloads = (tmpl.rxq.vlan_strip ? - IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : - 0), - }; /* By default, FCS (CRC) is stripped by hardware. */ if (dev->data->dev_conf.rxmode.hw_strip_crc) { tmpl.rxq.crc_present = 0; } else if (priv->hw_fcs_strip) { - /* Ask HW/Verbs to leave CRC in place when supported. */ - attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS; - attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; tmpl.rxq.crc_present = 1; } else { WARN("%p: CRC stripping has been disabled but will still" @@ -1004,59 +876,9 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, (void *)dev, tmpl.rxq.crc_present ? "disabled" : "enabled", tmpl.rxq.crc_present << 2); - if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) - ; /* Nothing else to do. */ - else if (priv->hw_padding) { - INFO("%p: enabling packet padding on queue %p", - (void *)dev, (void *)rxq_ctrl); - attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING; - attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; - } else - WARN("%p: packet padding has been requested but is not" - " supported, make sure MLNX_OFED and firmware are" - " up to date", - (void *)dev); - - tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq); - if (tmpl.wq == NULL) { - ret = (errno ? errno : EINVAL); - ERROR("%p: WQ creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - /* - * Make sure number of WRs*SGEs match expectations since a queue - * cannot allocate more than "desc" buffers. - */ - if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) || - ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) { - ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", - (void *)dev, - (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n), - attr.wq.max_recv_wr, attr.wq.max_recv_sge); - ret = EINVAL; - goto error; - } /* Save port ID. */ tmpl.rxq.port_id = dev->data->port_id; DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id); - /* Change queue state to ready. */ - mod = (struct ibv_exp_wq_attr){ - .attr_mask = IBV_EXP_WQ_ATTR_STATE, - .wq_state = IBV_EXP_WQS_RDY, - }; - ret = ibv_exp_modify_wq(tmpl.wq, &mod); - if (ret) { - ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", - (void *)dev, strerror(ret)); - goto error; - } - ret = rxq_setup(&tmpl); - if (ret) { - ERROR("%p: cannot initialize RX queue structure: %s", - (void *)dev, strerror(ret)); - goto error; - } ret = rxq_alloc_elts(&tmpl, desc); if (ret) { ERROR("%p: RXQ allocation failed: %s", @@ -1075,17 +897,12 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, rte_free(tmpl.rxq.elts); tmpl.rxq.elts = elts; *rxq_ctrl = tmpl; - /* Update doorbell counter. */ - rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n; - rte_wmb(); - *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci); DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); assert(ret == 0); return 0; error: - elts = tmpl.rxq.elts; + rte_free(tmpl.rxq.elts); mlx5_rxq_cleanup(&tmpl); - rte_free(elts); assert(ret > 0); return ret; } @@ -1175,14 +992,20 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, } } ret = mlx5_rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp); - if (ret) + if (ret) { rte_free(rxq_ctrl); - else { - rxq_ctrl->rxq.stats.idx = idx; - DEBUG("%p: adding RX queue %p to list", - (void *)dev, (void *)rxq_ctrl); - (*priv->rxqs)[idx] = &rxq_ctrl->rxq; + goto out; } + rxq_ctrl->rxq.stats.idx = idx; + DEBUG("%p: adding RX queue %p to list", + (void *)dev, (void *)rxq_ctrl); + (*priv->rxqs)[idx] = &rxq_ctrl->rxq; + rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx); + if (!rxq_ctrl->ibv) { + ret = EAGAIN; + goto out; + } +out: priv_unlock(priv); return -ret; } @@ -1209,7 +1032,7 @@ mlx5_rx_queue_release(void *dpdk_rxq) rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); priv = rxq_ctrl->priv; priv_lock(priv); - if (priv_flow_rxq_in_use(priv, rxq)) + if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv)) rte_panic("Rx queue %p is still used by a flow and cannot be" " removed\n", (void *)rxq_ctrl); for (i = 0; (i != priv->rxqs_n); ++i) @@ -1253,15 +1076,14 @@ priv_rx_intr_vec_enable(struct priv *priv) } intr_handle->type = RTE_INTR_HANDLE_EXT; for (i = 0; i != n; ++i) { - struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); + /* This rxq ibv must not be released in this function. */ + struct mlx5_rxq_ibv *rxq = mlx5_priv_rxq_ibv_get(priv, i); int fd; int flags; int rc; /* Skip queues that cannot request interrupts. */ - if (!rxq || !rxq_ctrl->channel) { + if (!rxq || !rxq->channel) { /* Use invalid intr_vec[] index to disable entry. */ intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + @@ -1275,7 +1097,7 @@ priv_rx_intr_vec_enable(struct priv *priv) priv_rx_intr_vec_disable(priv); return -1; } - fd = rxq_ctrl->channel->fd; + fd = rxq->channel->fd; flags = fcntl(fd, F_GETFL); rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); if (rc < 0) { @@ -1305,7 +1127,27 @@ void priv_rx_intr_vec_disable(struct priv *priv) { struct rte_intr_handle *intr_handle = priv->dev->intr_handle; + unsigned int i; + unsigned int rxqs_n = priv->rxqs_n; + unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + if (!priv->dev->data->dev_conf.intr_conf.rxq) + return; + for (i = 0; i != n; ++i) { + struct mlx5_rxq_ctrl *ctrl; + struct mlx5_rxq_data *rxq; + + if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + + RTE_MAX_RXTX_INTR_VEC_ID) + continue; + /** + * Need to access directly the queue to release the reference + * kept in priv_rx_intr_vec_enable(). + */ + rxq = (*priv->rxqs)[i]; + ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); + mlx5_priv_rxq_ibv_release(priv, ctrl->ibv); + } rte_intr_free_epoll_fd(intr_handle); free(intr_handle->intr_vec); intr_handle->nb_efd = 0; @@ -1329,19 +1171,19 @@ int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { struct priv *priv = mlx5_get_priv(dev); - struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct mlx5_rxq_ibv *rxq = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); int ret; - if (!rxq || !rxq_ctrl->channel) { + if (!rxq || !rxq->channel) { ret = EINVAL; } else { - ibv_mlx5_exp_update_cq_ci(rxq_ctrl->cq, rxq->cq_ci); - ret = ibv_req_notify_cq(rxq_ctrl->cq, 0); + ibv_mlx5_exp_update_cq_ci(rxq->cq, + (*priv->rxqs)[rx_queue_id]->cq_ci); + ret = ibv_req_notify_cq(rxq->cq, 0); } if (ret) WARN("unable to arm interrupt on rx queue %d", rx_queue_id); + mlx5_priv_rxq_ibv_release(priv, rxq); return -ret; } @@ -1360,26 +1202,312 @@ int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { struct priv *priv = mlx5_get_priv(dev); - struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct mlx5_rxq_ibv *rxq = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); struct ibv_cq *ev_cq; void *ev_ctx; int ret; - if (!rxq || !rxq_ctrl->channel) { + if (!rxq || !rxq->channel) { ret = EINVAL; } else { - ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx); - if (ret || ev_cq != rxq_ctrl->cq) + ret = ibv_get_cq_event(rxq->cq->channel, &ev_cq, &ev_ctx); + if (ret || ev_cq != rxq->cq) ret = EINVAL; } if (ret) WARN("unable to disable interrupt on rx queue %d", rx_queue_id); else - ibv_ack_cq_events(rxq_ctrl->cq, 1); + ibv_ack_cq_events(rxq->cq, 1); + mlx5_priv_rxq_ibv_release(priv, rxq); return -ret; } #endif /* HAVE_UPDATE_CQ_CI */ + +/** + * Create the Rx queue Verbs object. + * + * @param priv + * Pointer to private structure. + * @param idx + * Queue index in DPDK Rx queue array + * + * @return + * The Verbs object initialised if it can be created. + */ +struct mlx5_rxq_ibv* +mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) +{ + struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct ibv_exp_wq_attr mod; + union { + struct ibv_exp_cq_init_attr cq; + struct ibv_exp_wq_init_attr wq; + struct ibv_exp_cq_attr cq_attr; + } attr; + unsigned int cqe_n = (1 << rxq->elts_n) - 1; + struct mlx5_rxq_ibv *tmpl; + struct ibv_mlx5_cq_info cq_info; + struct mlx5_rwq *rwq; + unsigned int i; + int ret = 0; + + assert(!rxq_ctrl->ibv); + tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, + rxq_ctrl->socket); + if (!tmpl) { + ERROR("%p: cannot allocate verbs ressources", + (void*)rxq_ctrl); + goto error; + } + /* Use the entire RX mempool as the memory region. */ + tmpl->mr = priv_mr_get(priv, rxq->mp); + if (!tmpl->mr) { + tmpl->mr = priv_mr_new(priv, rxq->mp); + if (!tmpl->mr) { + ERROR("%p: MR creation failure", (void *)rxq_ctrl); + goto error; + } + } + if (rxq_ctrl->memory_channel) { + tmpl->channel = ibv_create_comp_channel(priv->ctx); + if (!tmpl->channel) { + ERROR("%p: Comp Channel creation failure", + (void *)rxq_ctrl); + goto error; + } + } + attr.cq = (struct ibv_exp_cq_init_attr){ + .comp_mask = 0, + }; + if (priv->cqe_comp) { + attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS; + attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE; + /* + * For vectorized Rx, it must not be doubled in order to + * make cq_ci and rq_ci aligned. + */ + if (rxq_check_vec_support(rxq) < 0) + cqe_n *= 2; + } + tmpl->cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0, + &attr.cq); + if (tmpl->cq == NULL) { + ERROR("%p: CQ creation failure", (void *)rxq_ctrl); + goto error; + } + if (ibv_mlx5_exp_get_cq_info(tmpl->cq, &cq_info)) { + ERROR("Unable to query CQ info. check your OFED."); + goto error; + } + if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { + ERROR("Wrong MLX5_CQE_SIZE environment variable value: " + "it should be set to %u", RTE_CACHE_LINE_SIZE); + goto error; + } + DEBUG("priv->device_attr.max_qp_wr is %d", + priv->device_attr.max_qp_wr); + DEBUG("priv->device_attr.max_sge is %d", + priv->device_attr.max_sge); + attr.wq = (struct ibv_exp_wq_init_attr){ + .wq_context = NULL, /* Could be useful in the future. */ + .wq_type = IBV_EXP_WQT_RQ, + /* Max number of outstanding WRs. */ + .max_recv_wr = (1 << rxq->elts_n) >> rxq->sges_n, + /* Max number of scatter/gather elements in a WR. */ + .max_recv_sge = 1 << rxq->sges_n, + .pd = priv->pd, + .cq = tmpl->cq, + .comp_mask = + IBV_EXP_CREATE_WQ_VLAN_OFFLOADS | + 0, + .vlan_offloads = (rxq->vlan_strip ? + IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : + 0), + }; + /* By default, FCS (CRC) is stripped by hardware. */ + if (rxq->crc_present) { + attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS; + attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; + } + if (priv->hw_padding) { + attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING; + attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; + } + tmpl->wq = ibv_exp_create_wq(priv->ctx, &attr.wq); + if (tmpl->wq == NULL) { + ERROR("%p: WQ creation failure", (void *)rxq_ctrl); + goto error; + } + /* + * Make sure number of WRs*SGEs match expectations since a queue + * cannot allocate more than "desc" buffers. + */ + if (((int)attr.wq.max_recv_wr != ((1 << rxq->elts_n) >> rxq->sges_n)) || + ((int)attr.wq.max_recv_sge != (1 << rxq->sges_n))) { + ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", + (void *)rxq_ctrl, + ((1 << rxq->elts_n) >> rxq->sges_n), + (1 << rxq->sges_n), + attr.wq.max_recv_wr, attr.wq.max_recv_sge); + goto error; + } + /* Change queue state to ready. */ + mod = (struct ibv_exp_wq_attr){ + .attr_mask = IBV_EXP_WQ_ATTR_STATE, + .wq_state = IBV_EXP_WQS_RDY, + }; + ret = ibv_exp_modify_wq(tmpl->wq, &mod); + if (ret) { + ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed", + (void *)rxq_ctrl); + goto error; + } + /* Fill the rings. */ + rwq = container_of(tmpl->wq, struct mlx5_rwq, wq); + rxq->wqes = (volatile struct mlx5_wqe_data_seg (*)[]) + (uintptr_t)rwq->rq.buff; + for (i = 0; (i != (unsigned int)(1 << rxq->elts_n)); ++i) { + struct rte_mbuf *buf = (*rxq->elts)[i]; + volatile struct mlx5_wqe_data_seg *scat = &(*rxq->wqes)[i]; + + /* scat->addr must be able to store a pointer. */ + assert(sizeof(scat->addr) >= sizeof(uintptr_t)); + *scat = (struct mlx5_wqe_data_seg){ + .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), + .byte_count = htonl(DATA_LEN(buf)), + .lkey = tmpl->mr->lkey, + }; + } + rxq->rq_db = rwq->rq.db; + rxq->cqe_n = log2above(cq_info.cqe_cnt); + rxq->cq_ci = 0; + rxq->rq_ci = 0; + rxq->cq_db = cq_info.dbrec; + rxq->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; + /* Update doorbell counter. */ + rxq->rq_ci = (1 << rxq->elts_n) >> rxq->sges_n; + rte_wmb(); + *rxq->rq_db = htonl(rxq->rq_ci); + DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); + rte_atomic32_inc(&tmpl->refcnt); + DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void*)priv, + (void*)tmpl, rte_atomic32_read(&tmpl->refcnt)); + LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); + return tmpl; +error: + if (tmpl->wq) + claim_zero(ibv_exp_destroy_wq(tmpl->wq)); + if (tmpl->cq) + claim_zero(ibv_destroy_cq(tmpl->cq)); + if (tmpl->channel) + claim_zero(ibv_destroy_comp_channel(tmpl->channel)); + if (tmpl->mr) + priv_mr_release(priv, tmpl->mr); + return NULL; + +} + +/** + * Get an Rx queue Verbs object. + * + * @param priv + * Pointer to private structure. + * @param idx + * Queue index in DPDK Rx queue array + * + * @return + * The Verbs object if it exists. + */ +struct mlx5_rxq_ibv* +mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx) +{ + struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; + struct mlx5_rxq_ctrl *ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct mlx5_mr *mr __rte_unused; + + if (ctrl->ibv) { + mr = priv_mr_get(priv, rxq->mp); + rte_atomic32_inc(&ctrl->ibv->refcnt); + DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void*)priv, + (void*)ctrl->ibv, rte_atomic32_read(&ctrl->ibv->refcnt)); + } + return ctrl->ibv; +} + +/** + * Release an Rx verbs queue object. + * + * @param priv + * Pointer to private structure. + * @param rxq + * Verbs Rx queue object. + * + * @return + * 0 on success, errno value on failure. + */ +int +mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq) +{ + int ret; + + assert(rxq->wq); + assert(rxq->cq); + assert(rxq->mr); + ret = priv_mr_release(priv, rxq->mr); + if (!ret) + rxq->mr = NULL; + DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void*)priv, + (void*)rxq, rte_atomic32_read(&rxq->refcnt)); + if (rte_atomic32_dec_and_test(&rxq->refcnt)) { + claim_zero(ibv_exp_destroy_wq(rxq->wq)); + claim_zero(ibv_destroy_cq(rxq->cq)); + if (rxq->channel) + claim_zero(ibv_destroy_comp_channel(rxq->channel)); + LIST_REMOVE(rxq, next); + rte_free(rxq); + return 0; + } + return EBUSY; +} + +/** + * Verify the Verbs Rx queue list is empty + * + * @param priv + * Pointer to private structure. + * + * @return the number of object not released. + */ +int +mlx5_priv_rxq_ibv_verify(struct priv *priv) +{ + int ret = 0; + struct mlx5_rxq_ibv *rxq; + + LIST_FOREACH(rxq, &priv->rxqsibv, next) { + DEBUG("%p: Verbs Rx queue %p still referenced", (void*)priv, + (void*)rxq); + ++ret; + } + return ret; +} + +/** + * Return true if a single reference exists on the object. + * + * @param priv + * Pointer to private structure. + * @param rxq + * Verbs Rx queue object. + */ +int +mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq) +{ + (void)priv; + return (rte_atomic32_read(&rxq->refcnt) == 1); +} diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index c7c7518..abdbf6a 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -130,15 +130,24 @@ struct mlx5_rxq_data { struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ } __rte_cache_aligned; -/* RX queue control descriptor. */ -struct mlx5_rxq_ctrl { - struct priv *priv; /* Back pointer to private data. */ +/* Verbs Rx queue elements. */ +struct mlx5_rxq_ibv { + LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */ + rte_atomic32_t refcnt; /* Reference counter. */ + struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_exp_wq *wq; /* Work Queue. */ - struct mlx5_mr *mr; /* Memory Region (for mp). */ struct ibv_comp_channel *channel; - unsigned int socket; /* CPU socket ID for allocations. */ + struct mlx5_mr *mr; /* Memory Region (for mp). */ +}; + +/* RX queue control descriptor. */ +struct mlx5_rxq_ctrl { + struct priv *priv; /* Back pointer to private data. */ + struct mlx5_rxq_ibv *ibv; /* Verbs elements. */ struct mlx5_rxq_data rxq; /* Data path structure. */ + unsigned int socket; /* CPU socket ID for allocations. */ + unsigned int memory_channel:1; /* Need memory channel. */ }; /* Hash RX queue types. */ @@ -298,7 +307,6 @@ void priv_destroy_hash_rxqs(struct priv *); int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type); int priv_rehash_flows(struct priv *); void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *); -int mlx5_rxq_rehash(struct rte_eth_dev *, struct mlx5_rxq_ctrl *); int mlx5_rxq_ctrl_setup(struct rte_eth_dev *, struct mlx5_rxq_ctrl *, uint16_t, unsigned int, const struct rte_eth_rxconf *, struct rte_mempool *); @@ -311,6 +319,11 @@ void priv_rx_intr_vec_disable(struct priv *priv); int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); #endif /* HAVE_UPDATE_CQ_CI */ +struct mlx5_rxq_ibv* mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx); +struct mlx5_rxq_ibv* mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx); +int mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq); +int mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq); +int mlx5_priv_rxq_ibv_verify(struct priv *priv); /* mlx5_txq.c */ @@ -347,7 +360,6 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t); /* mlx5_mr.c */ -struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *); void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *); uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *, unsigned int); diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c index 512052a..dffa1cd 100644 --- a/drivers/net/mlx5/mlx5_vlan.c +++ b/drivers/net/mlx5/mlx5_vlan.c @@ -153,7 +153,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on) .vlan_offloads = vlan_offloads, }; - err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod); + err = ibv_exp_modify_wq(rxq_ctrl->ibv->wq, &mod); if (err) { ERROR("%p: failed to modified stripping mode: %s", (void *)priv, strerror(err)); -- 2.1.4