From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id D7526A04F9; Fri, 10 Jan 2020 09:39:16 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id B5A611DA4F; Fri, 10 Jan 2020 09:39:15 +0100 (CET) Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id 3B6141DBDD for ; Thu, 9 Jan 2020 09:50:17 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 09 Jan 2020 00:50:14 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,413,1571727600"; d="scan'208";a="226748986" Received: from unknown (HELO dpdk-zhangalvin-dev.sh.intel.com) ([10.240.179.50]) by fmsmga001.fm.intel.com with ESMTP; 09 Jan 2020 00:50:11 -0800 From: alvinx.zhang@intel.com To: haiyue.wang@intel.com; qi.z.zhang@intel.com; beilei.xing@intel.com; xiaolong.ye@intel.com; taox.zhu@intel.com Cc: dev@dpdk.org, Alvin Zhang Date: Thu, 9 Jan 2020 16:47:38 +0800 Message-Id: <1578559662-297138-2-git-send-email-alvinx.zhang@intel.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1578559662-297138-1-git-send-email-alvinx.zhang@intel.com> References: <1578559662-297138-1-git-send-email-alvinx.zhang@intel.com> X-Mailman-Approved-At: Fri, 10 Jan 2020 09:39:14 +0100 Subject: [dpdk-dev] [RFC 2/6] net/igc: igc poll mode driver X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Alvin Zhang link status, speed capabilities, Tx and Rx of packets, flow control, RSS, timestampping, VLAN filter, External VLAN, QinQ offload, base statistics, extend statistics, per queue statistics, Signed-off-by: Alvin Zhang --- drivers/net/igc/igc_ethdev.c | 2819 ++++++++++++++++++++++++++++++++++++++++++ drivers/net/igc/igc_ethdev.h | 179 +++ drivers/net/igc/igc_logs.c | 21 + drivers/net/igc/igc_logs.h | 48 + drivers/net/igc/igc_txrx.c | 2237 +++++++++++++++++++++++++++++++++ drivers/net/igc/igc_txrx.h | 56 + 6 files changed, 5360 insertions(+) create mode 100644 drivers/net/igc/igc_ethdev.c create mode 100644 drivers/net/igc/igc_ethdev.h create mode 100644 drivers/net/igc/igc_logs.c create mode 100644 drivers/net/igc/igc_logs.h create mode 100644 drivers/net/igc/igc_txrx.c create mode 100644 drivers/net/igc/igc_txrx.h diff --git a/drivers/net/igc/igc_ethdev.c b/drivers/net/igc/igc_ethdev.c new file mode 100644 index 0000000..87179cf --- /dev/null +++ b/drivers/net/igc/igc_ethdev.c @@ -0,0 +1,2819 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "igc_logs.h" +#include "igc_txrx.h" + +/* Per Queue Good Packets Received Count */ +#define IGC_PQGPRC(idx) (0x10010 + 0x100 * (idx)) +/* Per Queue Good Octets Received Count */ +#define IGC_PQGORC(idx) (0x10018 + 0x100 * (idx)) +/* Per Queue Good Octets Transmitted Count */ +#define IGC_PQGOTC(idx) (0x10034 + 0x100 * (idx)) +/* Per Queue Multicast Packets Received Count */ +#define IGC_PQMPRC(idx) (0x10038 + 0x100 * (idx)) +/* Transmit Queue Drop Packet Count */ +#define IGC_TQDPC(idx) (0xe030 + 0x40 * (idx)) + +#define IGC_FC_PAUSE_TIME 0x0680 +#define IGC_LINK_UPDATE_CHECK_TIMEOUT 90 /* 9s */ +#define IGC_LINK_UPDATE_CHECK_INTERVAL 100 /* ms */ +#define IGC_MSIX_OTHER_INTR_VEC 0 /* MSI-X other interrupt vector */ + +#define IGC_DISABLE_TIMER0_MSK (1u << 31) +#define IGC_TIMADJ_MAX 999999900u /* nono-seconds */ +#define IGC_TIMADJ_SIGN_MINUS (1u << 31) /* 0b="+", 1b="-" */ +/* single cycle or multi-cycle */ +#define IGC_TIMADJ_METH_SINGLE (1u << 30) +/* Use the PHY sop indication or not */ +#define IGC_TSYNCRXCTL_RXSYNSIG (1u << 10) +#define IGC_TSYNCTXCTL_TXSYNSIG (1u << 5) + +#define IGC_DEFAULT_RX_FREE_THRESH 32 + +#define IGC_DEFAULT_RX_PTHRESH 8 +#define IGC_DEFAULT_RX_HTHRESH 8 +#define IGC_DEFAULT_RX_WTHRESH 4 + +#define IGC_DEFAULT_TX_PTHRESH 8 +#define IGC_DEFAULT_TX_HTHRESH 1 +#define IGC_DEFAULT_TX_WTHRESH 16 + +#define IGC_ALARM_INTERVAL 8000000u +/* us, about 13.6s some per-queue registers will wrap around back to 0. */ + +/* MSI-X other interrupt vector */ +#define IGC_MSIX_OTHER_INTR_VEC 0 + +/* External VLAN Enable bit mask */ +#define IGC_CTRL_EXT_EXT_VLAN (1 << 26) + +/* External VLAN Ether Type bit mask and shift */ +#define IGC_VET_EXT 0xFFFF0000 +#define IGC_VET_EXT_SHIFT 16 + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN +#define U32_0_IN_U64 0 /* lower bytes of u64 */ +#define U32_1_IN_U64 1 /* higher bytes of u64 */ +#else +#define U32_0_IN_U64 1 +#define U32_1_IN_U64 0 +#endif + +static const struct rte_eth_desc_lim rx_desc_lim = { + .nb_max = IGC_MAX_RXD, + .nb_min = IGC_MIN_RXD, + .nb_align = IGC_RXD_ALIGN, +}; + +static const struct rte_eth_desc_lim tx_desc_lim = { + .nb_max = IGC_MAX_TXD, + .nb_min = IGC_MIN_TXD, + .nb_align = IGC_TXD_ALIGN, + .nb_seg_max = IGC_TX_MAX_SEG, + .nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG, +}; + +static enum igc_fc_mode igc_fc_setting = igc_fc_full; + +static const struct rte_pci_id pci_id_igc_map[] = { + { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) }, + { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V) }, + { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_I) }, + { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V) }, + { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_K) }, + { .vendor_id = 0, /* sentinel */ }, +}; + +/* store statistics names and its offset in stats structure */ +struct rte_igc_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +static const struct rte_igc_xstats_name_off rte_igc_stats_strings[] = { + {"rx_crc_errors", offsetof(struct igc_hw_stats, crcerrs)}, + {"rx_align_errors", offsetof(struct igc_hw_stats, algnerrc)}, + {"rx_errors", offsetof(struct igc_hw_stats, rxerrc)}, + {"rx_missed_packets", offsetof(struct igc_hw_stats, mpc)}, + {"tx_single_collision_packets", offsetof(struct igc_hw_stats, scc)}, + {"tx_multiple_collision_packets", offsetof(struct igc_hw_stats, mcc)}, + {"tx_excessive_collision_packets", offsetof(struct igc_hw_stats, + ecol)}, + {"tx_late_collisions", offsetof(struct igc_hw_stats, latecol)}, + {"tx_total_collisions", offsetof(struct igc_hw_stats, colc)}, + {"tx_deferred_packets", offsetof(struct igc_hw_stats, dc)}, + {"tx_no_carrier_sense_packets", offsetof(struct igc_hw_stats, tncrs)}, + {"tx_discarded_packets", offsetof(struct igc_hw_stats, htdpmc)}, + {"rx_length_errors", offsetof(struct igc_hw_stats, rlec)}, + {"rx_xon_packets", offsetof(struct igc_hw_stats, xonrxc)}, + {"tx_xon_packets", offsetof(struct igc_hw_stats, xontxc)}, + {"rx_xoff_packets", offsetof(struct igc_hw_stats, xoffrxc)}, + {"tx_xoff_packets", offsetof(struct igc_hw_stats, xofftxc)}, + {"rx_flow_control_unsupported_packets", offsetof(struct igc_hw_stats, + fcruc)}, + {"rx_size_64_packets", offsetof(struct igc_hw_stats, prc64)}, + {"rx_size_65_to_127_packets", offsetof(struct igc_hw_stats, prc127)}, + {"rx_size_128_to_255_packets", offsetof(struct igc_hw_stats, prc255)}, + {"rx_size_256_to_511_packets", offsetof(struct igc_hw_stats, prc511)}, + {"rx_size_512_to_1023_packets", offsetof(struct igc_hw_stats, + prc1023)}, + {"rx_size_1024_to_max_packets", offsetof(struct igc_hw_stats, + prc1522)}, + {"rx_broadcast_packets", offsetof(struct igc_hw_stats, bprc)}, + {"rx_multicast_packets", offsetof(struct igc_hw_stats, mprc)}, + {"rx_undersize_errors", offsetof(struct igc_hw_stats, ruc)}, + {"rx_fragment_errors", offsetof(struct igc_hw_stats, rfc)}, + {"rx_oversize_errors", offsetof(struct igc_hw_stats, roc)}, + {"rx_jabber_errors", offsetof(struct igc_hw_stats, rjc)}, + {"rx_no_buffers", offsetof(struct igc_hw_stats, rnbc)}, + {"rx_management_packets", offsetof(struct igc_hw_stats, mgprc)}, + {"rx_management_dropped", offsetof(struct igc_hw_stats, mgpdc)}, + {"tx_management_packets", offsetof(struct igc_hw_stats, mgptc)}, + {"rx_total_packets", offsetof(struct igc_hw_stats, tpr)}, + {"tx_total_packets", offsetof(struct igc_hw_stats, tpt)}, + {"rx_total_bytes", offsetof(struct igc_hw_stats, tor)}, + {"tx_total_bytes", offsetof(struct igc_hw_stats, tot)}, + {"tx_size_64_packets", offsetof(struct igc_hw_stats, ptc64)}, + {"tx_size_65_to_127_packets", offsetof(struct igc_hw_stats, ptc127)}, + {"tx_size_128_to_255_packets", offsetof(struct igc_hw_stats, ptc255)}, + {"tx_size_256_to_511_packets", offsetof(struct igc_hw_stats, ptc511)}, + {"tx_size_512_to_1023_packets", offsetof(struct igc_hw_stats, + ptc1023)}, + {"tx_size_1023_to_max_packets", offsetof(struct igc_hw_stats, + ptc1522)}, + {"tx_multicast_packets", offsetof(struct igc_hw_stats, mptc)}, + {"tx_broadcast_packets", offsetof(struct igc_hw_stats, bptc)}, + {"tx_tso_packets", offsetof(struct igc_hw_stats, tsctc)}, + {"rx_sent_to_host_packets", offsetof(struct igc_hw_stats, rpthc)}, + {"tx_sent_by_host_packets", offsetof(struct igc_hw_stats, hgptc)}, + {"interrupt_assert_count", offsetof(struct igc_hw_stats, iac)}, + {"rx_descriptor_lower_threshold", + offsetof(struct igc_hw_stats, icrxdmtc)}, +}; + +#define IGC_NB_XSTATS (sizeof(rte_igc_stats_strings) / \ + sizeof(rte_igc_stats_strings[0])) + +static int eth_igc_configure(struct rte_eth_dev *dev); +static void eth_igc_stop(struct rte_eth_dev *dev); +static int eth_igc_start(struct rte_eth_dev *dev); +static int eth_igc_set_link_up(struct rte_eth_dev *dev); +static int eth_igc_set_link_down(struct rte_eth_dev *dev); +static void eth_igc_close(struct rte_eth_dev *dev); +static int eth_igc_reset(struct rte_eth_dev *dev); +static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev); +static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev); +static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev); +static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev); +static int eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete); + +static int eth_igc_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *rte_stats); +static int eth_igc_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, unsigned int n); +static int eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, + const uint64_t *ids, + uint64_t *values, unsigned int n); +static int eth_igc_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int size); +static int eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, const uint64_t *ids, + unsigned int limit); +static int eth_igc_xstats_reset(struct rte_eth_dev *dev); +static int +eth_igc_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, + uint16_t queue_id, uint8_t stat_idx, __rte_unused uint8_t is_rx); +static int eth_igc_fw_version_get(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); +static int eth_igc_infos_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev); +static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); +static int +eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id); +static int +eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id); +static int +eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on); +static int eth_igc_vlan_tpid_set(struct rte_eth_dev *dev, + enum rte_vlan_type vlan_type, uint16_t tpid); +static int eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask); +static int eth_igc_led_on(struct rte_eth_dev *dev); +static int eth_igc_led_off(struct rte_eth_dev *dev); +static int +eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); +static int +eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); +static int eth_igc_rar_set(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool); +static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index); +static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev, + struct rte_ether_addr *addr); +static int eth_igc_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +static int eth_igc_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +static int eth_igc_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +static int eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev, + struct rte_ether_addr *mc_addr_set, + uint32_t nb_mc_addr); +static int eth_igc_get_eeprom_length(struct rte_eth_dev *dev); + +static int +eth_igc_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta); +static int +eth_igc_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts); +static int +eth_igc_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts); + +static int eth_igc_timesync_enable(struct rte_eth_dev *dev); +static int eth_igc_timesync_disable(struct rte_eth_dev *dev); + +static int eth_igc_timesync_read_tx_timestamp(struct rte_eth_dev *dev, + struct timespec *timestamp); + +static const struct eth_dev_ops eth_igc_ops = { + .dev_configure = eth_igc_configure, + .dev_start = eth_igc_start, + .dev_stop = eth_igc_stop, + .dev_close = eth_igc_close, + .dev_reset = eth_igc_reset, + .dev_set_link_up = eth_igc_set_link_up, + .dev_set_link_down = eth_igc_set_link_down, + .promiscuous_enable = eth_igc_promiscuous_enable, + .promiscuous_disable = eth_igc_promiscuous_disable, + .allmulticast_enable = eth_igc_allmulticast_enable, + .allmulticast_disable = eth_igc_allmulticast_disable, + .link_update = eth_igc_link_update, + .stats_get = eth_igc_stats_get, + .xstats_get = eth_igc_xstats_get, + .xstats_get_by_id = eth_igc_xstats_get_by_id, + .xstats_get_names_by_id = eth_igc_xstats_get_names_by_id, + .xstats_get_names = eth_igc_xstats_get_names, + .stats_reset = eth_igc_xstats_reset, + .xstats_reset = eth_igc_xstats_reset, + .queue_stats_mapping_set = eth_igc_queue_stats_mapping_set, + .fw_version_get = eth_igc_fw_version_get, + .dev_infos_get = eth_igc_infos_get, + .dev_supported_ptypes_get = eth_igc_supported_ptypes_get, + .mtu_set = eth_igc_mtu_set, + .vlan_filter_set = eth_igc_vlan_filter_set, + .vlan_tpid_set = eth_igc_vlan_tpid_set, + .vlan_strip_queue_set = eth_igc_vlan_strip_queue_set, + .vlan_offload_set = eth_igc_vlan_offload_set, + .rx_queue_setup = eth_igc_rx_queue_setup, + .rx_queue_intr_enable = eth_igc_rx_queue_intr_enable, + .rx_queue_intr_disable = eth_igc_rx_queue_intr_disable, + .rx_queue_release = eth_igc_rx_queue_release, + .rx_queue_count = eth_igc_rx_queue_count, + .rx_descriptor_done = eth_igc_rx_descriptor_done, + .rx_descriptor_status = eth_igc_rx_descriptor_status, + .tx_descriptor_status = eth_igc_tx_descriptor_status, + .tx_queue_setup = eth_igc_tx_queue_setup, + .tx_queue_release = eth_igc_tx_queue_release, + .tx_done_cleanup = eth_igc_tx_done_cleanup, + .dev_led_on = eth_igc_led_on, + .dev_led_off = eth_igc_led_off, + .flow_ctrl_get = eth_igc_flow_ctrl_get, + .flow_ctrl_set = eth_igc_flow_ctrl_set, + .mac_addr_add = eth_igc_rar_set, + .mac_addr_remove = eth_igc_rar_clear, + .mac_addr_set = eth_igc_default_mac_addr_set, + .reta_update = eth_igc_rss_reta_update, + .reta_query = eth_igc_rss_reta_query, + .rss_hash_update = eth_igc_rss_hash_update, + .rss_hash_conf_get = eth_igc_rss_hash_conf_get, + .set_mc_addr_list = eth_igc_set_mc_addr_list, + .rxq_info_get = eth_igc_rxq_info_get, + .txq_info_get = eth_igc_txq_info_get, + .timesync_enable = eth_igc_timesync_enable, + .timesync_disable = eth_igc_timesync_disable, + .timesync_read_rx_timestamp = eth_igc_timesync_read_rx_timestamp, + .timesync_read_tx_timestamp = eth_igc_timesync_read_tx_timestamp, + .get_eeprom_length = eth_igc_get_eeprom_length, + .timesync_adjust_time = eth_igc_timesync_adjust_time, + .timesync_read_time = eth_igc_timesync_read_time, + .timesync_write_time = eth_igc_timesync_write_time, +}; + +/* + * multipe queue mode checking + */ +static int +igc_check_mq_mode(struct rte_eth_dev *dev) +{ + enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode; + enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode; + + if ((rx_mq_mode & ETH_MQ_RX_DCB_FLAG) || + tx_mq_mode == ETH_MQ_TX_DCB || + tx_mq_mode == ETH_MQ_TX_VMDQ_DCB) { + PMD_INIT_LOG(ERR, "DCB mode is not supported."); + return -EINVAL; + } + + if (RTE_ETH_DEV_SRIOV(dev).active != 0) { + PMD_INIT_LOG(ERR, "SRIOV is not supported."); + return -EINVAL; + } + + if (rx_mq_mode != ETH_MQ_RX_NONE && + rx_mq_mode != ETH_MQ_RX_RSS) { + /* RSS together with VMDq not supported*/ + PMD_INIT_LOG(ERR, "RX mode %d is not supported.", + rx_mq_mode); + return -EINVAL; + } + + /* To no break software that set invalid mode, only display + * warning if invalid mode is used. + */ + if (tx_mq_mode != ETH_MQ_TX_NONE) + PMD_INIT_LOG(WARNING, "TX mode %d is not supported." + " Due to txmode is meaningless in this driver," + " just ignore.", tx_mq_mode); + + return 0; +} + +static int +eth_igc_configure(struct rte_eth_dev *dev) +{ + struct igc_interrupt *intr = + IGC_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + int ret; + + PMD_INIT_FUNC_TRACE(); + + ret = igc_check_mq_mode(dev); + if (ret != 0) + return ret; + + intr->flags |= IGC_FLAG_NEED_LINK_UPDATE; + return 0; +} + +static int +eth_igc_set_link_up(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (hw->phy.media_type == igc_media_type_copper) + igc_power_up_phy(hw); + else + igc_power_up_fiber_serdes_link(hw); + return 0; +} + +static int +eth_igc_set_link_down(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (hw->phy.media_type == igc_media_type_copper) + igc_power_down_phy(hw); + else + igc_shutdown_fiber_serdes_link(hw); + return 0; +} + +/* + * rx,tx enable/disable + */ +static void +eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t tctl, rctl; + + tctl = IGC_READ_REG(hw, IGC_TCTL); + rctl = IGC_READ_REG(hw, IGC_RCTL); + + if (enable) { + /* enable Tx/Rx */ + tctl |= IGC_TCTL_EN; + rctl |= IGC_RCTL_EN; + } else { + /* disable Tx/Rx */ + tctl &= ~IGC_TCTL_EN; + rctl &= ~IGC_RCTL_EN; + } + IGC_WRITE_REG(hw, IGC_TCTL, tctl); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + IGC_WRITE_FLUSH(hw); +} + +/* + * disable other interrupt + */ +static void +igc_intr_other_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + + if (rte_intr_allow_others(intr_handle) && + dev->data->dev_conf.intr_conf.lsc != 0) { + IGC_WRITE_REG(hw, IGC_EIMC, 1 << IGC_MSIX_OTHER_INTR_VEC); + } + + IGC_WRITE_REG(hw, IGC_IMC, ~0); + IGC_WRITE_FLUSH(hw); +} + +/* + * enable other interrupt + */ +static inline void +igc_intr_other_enable(struct rte_eth_dev *dev) +{ + struct igc_interrupt *intr = + IGC_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + + if (rte_intr_allow_others(intr_handle) && + dev->data->dev_conf.intr_conf.lsc != 0) { + IGC_WRITE_REG(hw, IGC_EIMS, 1 << IGC_MSIX_OTHER_INTR_VEC); + } + + IGC_WRITE_REG(hw, IGC_IMS, intr->mask); + IGC_WRITE_FLUSH(hw); +} + +/* + * It reads ICR and gets interrupt causes, check it and set a bit flag + * to update link status. + */ +static void +eth_igc_interrupt_get_status(struct rte_eth_dev *dev) +{ + uint32_t icr; + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_interrupt *intr = + IGC_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + + /* read-on-clear nic registers here */ + icr = IGC_READ_REG(hw, IGC_ICR); + + intr->flags = 0; + if (icr & IGC_ICR_LSC) + intr->flags |= IGC_FLAG_NEED_LINK_UPDATE; +} + +/* return 0 means link status changed, -1 means not changed */ +static int +eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_eth_link link; + int link_check, count; + + link_check = 0; + hw->mac.get_link_status = 1; + + /* possible wait-to-complete in up to 9 seconds */ + for (count = 0; count < IGC_LINK_UPDATE_CHECK_TIMEOUT; count++) { + /* Read the real link status */ + switch (hw->phy.media_type) { + case igc_media_type_copper: + /* Do the work to read phy */ + igc_check_for_link(hw); + link_check = !hw->mac.get_link_status; + break; + + case igc_media_type_fiber: + igc_check_for_link(hw); + link_check = (IGC_READ_REG(hw, IGC_STATUS) & + IGC_STATUS_LU); + break; + + case igc_media_type_internal_serdes: + igc_check_for_link(hw); + link_check = hw->mac.serdes_has_link; + break; + + default: + break; + } + if (link_check || wait_to_complete == 0) + break; + rte_delay_ms(IGC_LINK_UPDATE_CHECK_INTERVAL); + } + memset(&link, 0, sizeof(link)); + + /* Now we check if a transition has happened */ + if (link_check) { + uint16_t duplex, speed; + hw->mac.ops.get_link_up_info(hw, &speed, &duplex); + link.link_duplex = (duplex == FULL_DUPLEX) ? + ETH_LINK_FULL_DUPLEX : + ETH_LINK_HALF_DUPLEX; + link.link_speed = speed; + link.link_status = ETH_LINK_UP; + link.link_autoneg = !(dev->data->dev_conf.link_speeds & + ETH_LINK_SPEED_FIXED); + + if (speed == SPEED_2500) { + uint32_t tipg = IGC_READ_REG(hw, IGC_TIPG); + if ((tipg & IGC_TIPG_IPGT_MASK) != 0x0b) { + tipg &= ~IGC_TIPG_IPGT_MASK; + tipg |= 0x0b; + IGC_WRITE_REG(hw, IGC_TIPG, tipg); + } + } + } else if (!link_check) { + link.link_speed = 0; + link.link_duplex = ETH_LINK_HALF_DUPLEX; + link.link_status = ETH_LINK_DOWN; + link.link_autoneg = ETH_LINK_FIXED; + } + + return rte_eth_linkstatus_set(dev, &link); +} + +/* + * It executes link_update after knowing an interrupt is present. + */ +static void +eth_igc_interrupt_action(struct rte_eth_dev *dev) +{ + struct igc_interrupt *intr = + IGC_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_eth_link link; + int ret; + + if (intr->flags & IGC_FLAG_NEED_LINK_UPDATE) { + intr->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; + + /* set get_link_status to check register later */ + ret = eth_igc_link_update(dev, 0); + + /* check if link has changed */ + if (ret < 0) + return; + + rte_eth_linkstatus_get(dev, &link); + if (link.link_status) + PMD_INIT_LOG(INFO, + " Port %d: Link Up - speed %u Mbps - %s", + dev->data->port_id, + (unsigned int)link.link_speed, + link.link_duplex == ETH_LINK_FULL_DUPLEX ? + "full-duplex" : "half-duplex"); + else + PMD_INIT_LOG(INFO, " Port %d: Link Down", + dev->data->port_id); + + PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", + pci_dev->addr.domain, + pci_dev->addr.bus, + pci_dev->addr.devid, + pci_dev->addr.function); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, + NULL); + } +} + +/* + * Interrupt handler which shall be registered at first. + * + * @handle + * Pointer to interrupt handle. + * @param + * The address of parameter (struct rte_eth_dev *) regsitered before. + */ +static void +eth_igc_interrupt_handler(void *param) +{ + struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + + eth_igc_interrupt_get_status(dev); + eth_igc_interrupt_action(dev); +} + +static void igc_read_queue_stats_register(struct rte_eth_dev *dev); + +/* + * Update the queue status every IGC_ALARM_INTERVAL time. + * @param + * The address of parameter (struct rte_eth_dev *) regsitered before. + */ +static void +igc_update_queue_stats_handler(void *param) +{ + struct rte_eth_dev *dev = param; + igc_read_queue_stats_register(dev); + rte_eal_alarm_set(IGC_ALARM_INTERVAL, + igc_update_queue_stats_handler, dev); +} + +/* + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC. + */ +static void +eth_igc_stop(struct rte_eth_dev *dev) +{ + struct igc_adapter *adapter = + IGC_DEV_PRIVATE(dev->data->dev_private); + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_eth_link link; + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + + adapter->stopped = 1; + + /* disable receive and transmit */ + eth_igc_rxtx_control(dev, false); + + /* disable all MSI-X interrupts */ + IGC_WRITE_REG(hw, IGC_EIMC, 0x1f); + IGC_WRITE_FLUSH(hw); + + igc_intr_other_disable(dev); + + rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev); + + /* disable intr eventfd mapping */ + rte_intr_disable(intr_handle); + + igc_reset_hw(hw); + + /* disable all wake up */ + IGC_WRITE_REG(hw, IGC_WUC, 0); + + /* Set bit for Go Link disconnect */ + if (hw->mac.type >= igc_82580) { + uint32_t phpm_reg; + + phpm_reg = IGC_READ_REG(hw, IGC_82580_PHY_POWER_MGMT); + phpm_reg |= IGC_82580_PM_GO_LINKD; + IGC_WRITE_REG(hw, IGC_82580_PHY_POWER_MGMT, phpm_reg); + } + + /* Power down the phy. Needed to make the link go Down */ + eth_igc_set_link_down(dev); + + igc_dev_clear_queues(dev); + + /* clear the recorded link status */ + memset(&link, 0, sizeof(link)); + rte_eth_linkstatus_set(dev, &link); + + if (!rte_intr_allow_others(intr_handle)) + /* resume to the default handler */ + rte_intr_callback_register(intr_handle, + eth_igc_interrupt_handler, + (void *)dev); + + /* Clean datapath event and queue/vec mapping */ + rte_intr_efd_disable(intr_handle); + if (intr_handle->intr_vec != NULL) { + rte_free(intr_handle->intr_vec); + intr_handle->intr_vec = NULL; + } +} + +/* + * write interrupt vector allocation register + * @hw + * board private structure + * @queue_index + * queue index, valid 0,1,2,3 + * @tx + * tx:1, rx:0 + * @msix_vector + * msix-vector, valid 0,1,2,3,4 + */ +static void +igc_write_ivar(struct igc_hw *hw, uint8_t queue_index, + bool tx, uint8_t msix_vector) +{ + uint8_t offset = 0; + uint8_t reg_index = queue_index >> 1; + uint32_t val; + + /* + * IVAR(0) + * bit31...24 bit23...16 bit15...8 bit7...0 + * TX1 RX1 TX0 RX0 + * + * IVAR(1) + * bit31...24 bit23...16 bit15...8 bit7...0 + * TX3 RX3 TX2 RX2 + */ + + if (tx) + offset = 8; + + if (queue_index & 1) + offset += 16; + + val = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, reg_index); + + /* clear bits */ + val &= ~((uint32_t)0xFF << offset); + + /* write vector and valid bit */ + val |= (msix_vector | IGC_IVAR_VALID) << offset; + + IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, reg_index, val); +} + +/* Sets up the hardware to generate MSI-X interrupts properly + * @hw + * board private structure + */ +static void +eth_igc_configure_msix_intr(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + + uint32_t regval, intr_mask; + uint32_t vec = IGC_MISC_VEC_ID; + uint32_t base = IGC_MISC_VEC_ID; + uint32_t misc_shift = 0; + int i; + + /* won't configure msix register if no mapping is done + * between intr vector and event fd + */ + if (!rte_intr_dp_is_en(intr_handle)) + return; + + if (rte_intr_allow_others(intr_handle)) { + base = IGC_RX_VEC_START; + vec = base; + misc_shift = 1; + } + + /* turn on MSI-X capability first */ + IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE | + IGC_GPIE_PBA | IGC_GPIE_EIAME | + IGC_GPIE_NSICR); + intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << + misc_shift; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + intr_mask |= (1 << IGC_MSIX_OTHER_INTR_VEC); + + /* enable msix auto-clear */ + regval = IGC_READ_REG(hw, IGC_EIAC); + IGC_WRITE_REG(hw, IGC_EIAC, regval | intr_mask); + + /* set other cause interrupt vector */ + regval = (IGC_MSIX_OTHER_INTR_VEC | IGC_IVAR_VALID) << 8; + IGC_WRITE_REG(hw, IGC_IVAR_MISC, regval); + + /* disable auto-mask */ + IGC_WRITE_REG(hw, IGC_EIAM, 0); + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + igc_write_ivar(hw, i, 0, vec); + intr_handle->intr_vec[i] = vec; + if (vec < base + intr_handle->nb_efd - 1) + vec++; + } + + IGC_WRITE_FLUSH(hw); +} + +static int +eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_vfta *shadow_vfta = + IGC_DEV_PRIVATE_TO_VFTA(dev->data->dev_private); + uint32_t vfta; + uint32_t vid_idx; + uint32_t vid_bit; + + vid_idx = (vlan_id >> IGC_VFTA_ENTRY_SHIFT) & IGC_VFTA_ENTRY_MASK; + vid_bit = 1u << (vlan_id & IGC_VFTA_ENTRY_BIT_SHIFT_MASK); + vfta = IGC_READ_REG_ARRAY(hw, IGC_VFTA, vid_idx); + if (on) + vfta |= vid_bit; + else + vfta &= ~vid_bit; + IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, vid_idx, vfta); + + /* update local VFTA copy */ + shadow_vfta->vfta[vid_idx] = vfta; + + return 0; +} + +static int +eth_igc_vlan_tpid_set(struct rte_eth_dev *dev, + enum rte_vlan_type vlan_type, + uint16_t tpid) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reg_val, qinq; + + qinq = IGC_READ_REG(hw, IGC_CTRL_EXT); + qinq &= IGC_CTRL_EXT_EXT_VLAN; + + /* only outer TPID of double VLAN can be configured*/ + if (qinq && vlan_type == ETH_VLAN_TYPE_OUTER) { + reg_val = IGC_READ_REG(hw, IGC_VET); + reg_val = (reg_val & (~IGC_VET_EXT)) | + ((uint32_t)tpid << IGC_VET_EXT_SHIFT); + IGC_WRITE_REG(hw, IGC_VET, reg_val); + + return 0; + } + + /* all other TPID values are read-only*/ + PMD_DRV_LOG(ERR, "Not supported"); + return -ENOTSUP; +} + +static void +igc_vlan_hw_filter_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reg_val; + + /* Filter Table Disable */ + reg_val = IGC_READ_REG(hw, IGC_RCTL); + reg_val &= ~(IGC_RCTL_CFIEN | IGC_RCTL_VFE); + + IGC_WRITE_REG(hw, IGC_RCTL, reg_val); +} + +static void +igc_vlan_hw_filter_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_vfta *shadow_vfta = + IGC_DEV_PRIVATE_TO_VFTA(dev->data->dev_private); + uint32_t reg_val; + int i; + + /* Filter Table Enable, CFI not used for packet acceptance */ + reg_val = IGC_READ_REG(hw, IGC_RCTL); + reg_val &= ~IGC_RCTL_CFIEN; + reg_val |= IGC_RCTL_VFE; + IGC_WRITE_REG(hw, IGC_RCTL, reg_val); + + /* restore VFTA table */ + for (i = 0; i < IGC_VFTA_SIZE; i++) + IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, i, shadow_vfta->vfta[i]); +} + +static void +igc_vlan_hw_strip_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reg_val; + + /* VLAN Mode Disable */ + reg_val = IGC_READ_REG(hw, IGC_CTRL); + reg_val &= ~IGC_CTRL_VME; + IGC_WRITE_REG(hw, IGC_CTRL, reg_val); +} + +static void +igc_vlan_hw_strip_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reg_val; + + /* VLAN Mode Enable */ + reg_val = IGC_READ_REG(hw, IGC_CTRL); + reg_val |= IGC_CTRL_VME; + IGC_WRITE_REG(hw, IGC_CTRL, reg_val); +} + +static void +igc_vlan_hw_extend_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reg_val; + + /* CTRL_EXT: Extended VLAN */ + reg_val = IGC_READ_REG(hw, IGC_CTRL_EXT); + reg_val &= ~IGC_CTRL_EXT_EXTEND_VLAN; + IGC_WRITE_REG(hw, IGC_CTRL_EXT, reg_val); + + /* Update maximum packet length */ + if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) + IGC_WRITE_REG(hw, IGC_RLPML, + dev->data->dev_conf.rxmode.max_rx_pkt_len + + VLAN_TAG_SIZE); +} + +static void +igc_vlan_hw_extend_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reg_val; + + /* CTRL_EXT: Extended VLAN */ + reg_val = IGC_READ_REG(hw, IGC_CTRL_EXT); + reg_val |= IGC_CTRL_EXT_EXTEND_VLAN; + IGC_WRITE_REG(hw, IGC_CTRL_EXT, reg_val); + + /* Update maximum packet length */ + if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) + IGC_WRITE_REG(hw, IGC_RLPML, + dev->data->dev_conf.rxmode.max_rx_pkt_len + + 2 * VLAN_TAG_SIZE); +} + +static int +eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask) +{ + struct rte_eth_rxmode *rxmode; + + rxmode = &dev->data->dev_conf.rxmode; + if (mask & ETH_VLAN_STRIP_MASK) { + if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) + igc_vlan_hw_strip_enable(dev); + else + igc_vlan_hw_strip_disable(dev); + } + + if (mask & ETH_VLAN_FILTER_MASK) { + if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + igc_vlan_hw_filter_enable(dev); + else + igc_vlan_hw_filter_disable(dev); + } + + if (mask & ETH_VLAN_EXTEND_MASK) { + if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) + igc_vlan_hw_extend_enable(dev); + else + igc_vlan_hw_extend_disable(dev); + } + + return 0; +} + +/** + * It enables the interrupt mask and then enable the interrupt. + * + * @dev + * Pointer to struct rte_eth_dev. + * @on + * Enable or Disable + */ +static void +igc_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on) +{ + struct igc_interrupt *intr = + IGC_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + + if (on) + intr->mask |= IGC_ICR_LSC; + else + intr->mask &= ~IGC_ICR_LSC; +} + +/* + * It enables the interrupt. + * It will be called once only during nic initialized. + */ +static void +igc_rxq_interrupt_setup(struct rte_eth_dev *dev) +{ + uint32_t mask; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0; + + /* won't configure msix register if no mapping is done + * between intr vector and event fd + */ + if (!rte_intr_dp_is_en(intr_handle)) + return; + + mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift; + IGC_WRITE_REG(hw, IGC_EIMS, mask); +} + +/* + * Get hardware rx-buffer size. + */ +static inline int +igc_get_rx_buffer_size(struct igc_hw *hw) +{ + return (IGC_READ_REG(hw, IGC_RXPBS) & 0x3f) << 10; +} + +/* + * free all rx/tx queues. + */ +static void +igc_dev_free_queues(struct rte_eth_dev *dev) +{ + uint16_t i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + eth_igc_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + eth_igc_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } + dev->data->nb_tx_queues = 0; +} + +/* + * igc_hw_control_acquire sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means + * that the driver is loaded. + */ +static void +igc_hw_control_acquire(struct igc_hw *hw) +{ + uint32_t ctrl_ext; + + /* Let firmware know the driver has taken over */ + ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT); + IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); +} + +/* + * igc_hw_control_release resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. + */ +static void +igc_hw_control_release(struct igc_hw *hw) +{ + uint32_t ctrl_ext; + + /* Let firmware taken over control of h/w */ + ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT); + IGC_WRITE_REG(hw, IGC_CTRL_EXT, + ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); +} + +static int +igc_hardware_init(struct igc_hw *hw) +{ + uint32_t rx_buf_size; + int diag; + + /* Let the firmware know the OS is in control */ + igc_hw_control_acquire(hw); + + /* + * These parameters control the automatic generation (Tx) and + * response (Rx) to Ethernet PAUSE frames. + * - High water mark should allow for at least two standard size (1518) + * frames to be received after sending an XOFF. + * - Low water mark works best when it is very near the high water mark. + * This allows the receiver to restart by sending XON when it has + * drained a bit. Here we use an arbitrary value of 1500 which will + * restart after one full frame is pulled from the buffer. There + * could be several smaller frames in the buffer and if so they will + * not trigger the XON until their total number reduces the buffer + * by 1500. + */ + rx_buf_size = igc_get_rx_buffer_size(hw); + hw->fc.high_water = rx_buf_size - (RTE_ETHER_MAX_LEN * 2); + hw->fc.low_water = hw->fc.high_water - 1500; + hw->fc.pause_time = IGC_FC_PAUSE_TIME; + hw->fc.send_xon = 1; + + /* Set Flow control, use the tunable location if sane */ + if (igc_fc_setting != igc_fc_none && igc_fc_setting < 4) + hw->fc.requested_mode = igc_fc_setting; + else + hw->fc.requested_mode = igc_fc_none; + + /* Issue a global reset */ + igc_reset_hw(hw); + + /* disable all wake up */ + IGC_WRITE_REG(hw, IGC_WUC, 0); + + diag = igc_init_hw(hw); + if (diag < 0) + return diag; + + /* write vlan ethernet type */ + IGC_WRITE_REG(hw, IGC_VET, + RTE_ETHER_TYPE_VLAN << 16 | RTE_ETHER_TYPE_VLAN); + + igc_get_phy_info(hw); + igc_check_for_link(hw); + + return 0; +} + +static int +eth_igc_start(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_adapter *adapter = + IGC_DEV_PRIVATE(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + uint32_t *speeds; + uint32_t intr_vector = 0; + int ret, mask; + int num_speeds; + bool autoneg; + + PMD_INIT_FUNC_TRACE(); + + /* disable all MSI-X interrupts */ + IGC_WRITE_REG(hw, IGC_EIMC, 0x1f); + IGC_WRITE_FLUSH(hw); + + /* disable uio/vfio intr/eventfd mapping */ + rte_intr_disable(intr_handle); + + /* Power up the phy. Needed to make the link go Up */ + eth_igc_set_link_up(dev); + + /* Put the address into the Receive Address Array */ + igc_rar_set(hw, hw->mac.addr, 0); + + /* Initialize the hardware */ + if (igc_hardware_init(hw)) { + PMD_INIT_LOG(ERR, "Unable to initialize the hardware"); + return -EIO; + } + adapter->stopped = 0; + + IGC_WRITE_REG(hw, IGC_VET, + RTE_ETHER_TYPE_VLAN << 16 | RTE_ETHER_TYPE_VLAN); + + /* check and configure queue intr-vector mapping */ + if ((rte_intr_cap_multiple(intr_handle) || + !RTE_ETH_DEV_SRIOV(dev).active) && + dev->data->dev_conf.intr_conf.rxq != 0) { + intr_vector = dev->data->nb_rx_queues; + if (rte_intr_efd_enable(intr_handle, intr_vector)) + return -1; + } + + if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) { + intr_handle->intr_vec = + rte_zmalloc("intr_vec", + dev->data->nb_rx_queues * sizeof(int), 0); + if (intr_handle->intr_vec == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" + " intr_vec", dev->data->nb_rx_queues); + return -ENOMEM; + } + } + + /* confiugre msix for rx interrupt */ + eth_igc_configure_msix_intr(dev); + + igc_tx_init(dev); + + /* This can fail when allocating mbufs for descriptor rings */ + ret = igc_rx_init(dev); + if (ret) { + PMD_INIT_LOG(ERR, "Unable to initialize RX hardware"); + igc_dev_clear_queues(dev); + return ret; + } + + igc_clear_hw_cntrs_base_generic(hw); + + /* + * VLAN Offload Settings + */ + mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | + ETH_VLAN_EXTEND_MASK; + ret = eth_igc_vlan_offload_set(dev, mask); + if (ret) { + PMD_INIT_LOG(ERR, "Unable to set vlan offload"); + igc_dev_clear_queues(dev); + return ret; + } + + /* Setup link speed and duplex */ + speeds = &dev->data->dev_conf.link_speeds; + if (*speeds == ETH_LINK_SPEED_AUTONEG) { + hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500; + hw->mac.autoneg = 1; + } else { + num_speeds = 0; + autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; + + /* Reset */ + hw->phy.autoneg_advertised = 0; + + if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | + ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M | + ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | + ETH_LINK_SPEED_FIXED)) { + num_speeds = -1; + goto error_invalid_config; + } + if (*speeds & ETH_LINK_SPEED_10M_HD) { + hw->phy.autoneg_advertised |= ADVERTISE_10_HALF; + num_speeds++; + } + if (*speeds & ETH_LINK_SPEED_10M) { + hw->phy.autoneg_advertised |= ADVERTISE_10_FULL; + num_speeds++; + } + if (*speeds & ETH_LINK_SPEED_100M_HD) { + hw->phy.autoneg_advertised |= ADVERTISE_100_HALF; + num_speeds++; + } + if (*speeds & ETH_LINK_SPEED_100M) { + hw->phy.autoneg_advertised |= ADVERTISE_100_FULL; + num_speeds++; + } + if (*speeds & ETH_LINK_SPEED_1G) { + hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL; + num_speeds++; + } + if (*speeds & ETH_LINK_SPEED_2_5G) { + hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL; + num_speeds++; + } + if (num_speeds == 0 || (!autoneg && num_speeds > 1)) + goto error_invalid_config; + + /* Set/reset the mac.autoneg based on the link speed, + * fixed or not + */ + if (!autoneg) { + hw->mac.autoneg = 0; + hw->mac.forced_speed_duplex = + hw->phy.autoneg_advertised; + } else { + hw->mac.autoneg = 1; + } + } + + igc_setup_link(hw); + + if (rte_intr_allow_others(intr_handle)) { + /* check if lsc interrupt is enabled */ + if (dev->data->dev_conf.intr_conf.lsc != 0) + igc_lsc_interrupt_setup(dev, TRUE); + else + igc_lsc_interrupt_setup(dev, FALSE); + } else { + rte_intr_callback_unregister(intr_handle, + eth_igc_interrupt_handler, + (void *)dev); + if (dev->data->dev_conf.intr_conf.lsc != 0) + PMD_INIT_LOG(INFO, "lsc won't enable because of" + " no intr multiplex"); + } + + /* enable uio/vfio intr/eventfd mapping */ + rte_intr_enable(intr_handle); + + rte_eal_alarm_set(IGC_ALARM_INTERVAL, + igc_update_queue_stats_handler, dev); + + /* check if rxq interrupt is enabled */ + if (dev->data->dev_conf.intr_conf.rxq != 0 && + rte_intr_dp_is_en(intr_handle)) + igc_rxq_interrupt_setup(dev); + + /* resume enabled intr since hw reset */ + igc_intr_other_enable(dev); + + eth_igc_rxtx_control(dev, true); + eth_igc_link_update(dev, 0); + + PMD_INIT_LOG(DEBUG, "<<"); + return 0; + +error_invalid_config: + PMD_INIT_LOG(ERR, "Invalid advertised speeds (%u) for port %u", + dev->data->dev_conf.link_speeds, dev->data->port_id); + igc_dev_clear_queues(dev); + return -EINVAL; +} + +static int +igc_reset_swfw_lock(struct igc_hw *hw) +{ + int ret_val; + + /* + * Do mac ops initialization manually here, since we will need + * some function pointers set by this call. + */ + ret_val = igc_init_mac_params(hw); + if (ret_val) + return ret_val; + + /* + * SMBI lock should not fail in this early stage. If this is the case, + * it is due to an improper exit of the application. + * So force the release of the faulty lock. + */ + if (igc_get_hw_semaphore_generic(hw) < 0) + PMD_DRV_LOG(DEBUG, "SMBI lock released"); + + igc_put_hw_semaphore_generic(hw); + + if (hw->mac.ops.acquire_swfw_sync != NULL) { + uint16_t mask; + + /* + * Phy lock should not fail in this early stage. + * If this is the case, it is due to an improper exit of the + * application. So force the release of the faulty lock. + */ + mask = IGC_SWFW_PHY0_SM; + if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) { + PMD_DRV_LOG(DEBUG, "SWFW phy%d lock released", + hw->bus.func); + } + hw->mac.ops.release_swfw_sync(hw, mask); + + /* + * This one is more tricky since it is common to all ports; but + * swfw_sync retries last long enough (1s) to be almost sure + * that if lock can not be taken it is due to an improper lock + * of the semaphore. + */ + mask = IGC_SWFW_EEP_SM; + if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) + PMD_DRV_LOG(DEBUG, "SWFW common locks released"); + + hw->mac.ops.release_swfw_sync(hw, mask); + } + + return IGC_SUCCESS; +} + +static void +eth_igc_close(struct rte_eth_dev *dev) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_adapter *adapter = + IGC_DEV_PRIVATE(dev->data->dev_private); + int retry = 0; + + PMD_INIT_FUNC_TRACE(); + + if (!adapter->stopped) + eth_igc_stop(dev); + + /* disable all MSI-X interrupts */ + IGC_WRITE_REG(hw, IGC_EIMC, 0x1f); + IGC_WRITE_FLUSH(hw); + + igc_intr_other_disable(dev); + do { + int ret = rte_intr_callback_unregister(intr_handle, + eth_igc_interrupt_handler, dev); + if (ret >= 0 || ret == -ENOENT || ret == -EINVAL) + break; + + PMD_INIT_LOG(ERR, "intr callback unregister failed: %d", ret); + DELAY(200 * 1000); /* delay 200ms */ + } while (retry++ < 5); + + igc_phy_hw_reset(hw); + igc_hw_control_release(hw); + igc_dev_free_queues(dev); + + dev->dev_ops = NULL; + dev->rx_pkt_burst = NULL; + dev->tx_pkt_burst = NULL; + + /* Reset any pending lock */ + igc_reset_swfw_lock(hw); +} + +static void +igc_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + hw->vendor_id = pci_dev->id.vendor_id; + hw->device_id = pci_dev->id.device_id; + hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + hw->subsystem_device_id = pci_dev->id.subsystem_device_id; +} + +static int +eth_igc_dev_init(struct rte_eth_dev *eth_dev) +{ + int error = 0; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct igc_adapter *adapter = + IGC_DEV_PRIVATE(eth_dev->data->dev_private); + + eth_dev->dev_ops = ð_igc_ops; + + /* + * for secondary processes, we don't initialize any further as primary + * has already done this work. Only check we don't need a different + * RX function. + */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + + hw->back = pci_dev; + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; + + igc_identify_hardware(eth_dev, pci_dev); + if (igc_setup_init_funcs(hw, FALSE) != IGC_SUCCESS) { + error = -EIO; + goto err_late; + } + + igc_get_bus_info(hw); + + /* Reset any pending lock */ + if (igc_reset_swfw_lock(hw) != IGC_SUCCESS) { + error = -EIO; + goto err_late; + } + + /* Finish initialization */ + if (igc_setup_init_funcs(hw, TRUE) != IGC_SUCCESS) { + error = -EIO; + goto err_late; + } + + hw->mac.autoneg = 1; + hw->phy.autoneg_wait_to_complete = 0; + hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500; + + /* Copper options */ + if (hw->phy.media_type == igc_media_type_copper) { + hw->phy.mdix = 0; /* AUTO_ALL_MODES */ + hw->phy.disable_polarity_correction = 0; + hw->phy.ms_type = igc_ms_hw_default; + } + + /* + * Start from a known state, this is important in reading the nvm + * and mac from that. + */ + igc_reset_hw(hw); + + /* Make sure we have a good EEPROM before we read from it */ + if (igc_validate_nvm_checksum(hw) < 0) { + /* + * Some PCI-E parts fail the first check due to + * the link being in sleep state, call it again, + * if it fails a second time its a real issue. + */ + if (igc_validate_nvm_checksum(hw) < 0) { + PMD_INIT_LOG(ERR, "EEPROM checksum invalid"); + error = -EIO; + goto err_late; + } + } + + /* Read the permanent MAC address out of the EEPROM */ + if (igc_read_mac_addr(hw) != 0) { + PMD_INIT_LOG(ERR, "EEPROM error while reading MAC address"); + error = -EIO; + goto err_late; + } + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("e1000", + RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate %d bytes needed to " + "store MAC addresses", + RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count); + error = -ENOMEM; + goto err_late; + } + + /* Copy the permanent MAC address */ + rte_ether_addr_copy((struct rte_ether_addr *)hw->mac.addr, + ð_dev->data->mac_addrs[0]); + + /* Now initialize the hardware */ + if (igc_hardware_init(hw) != 0) { + PMD_INIT_LOG(ERR, "Hardware initialization failed"); + rte_free(eth_dev->data->mac_addrs); + eth_dev->data->mac_addrs = NULL; + error = -ENODEV; + goto err_late; + } + + /* Pass the information to the rte_eth_dev_close() that it should also + * release the private port resources. + */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + hw->mac.get_link_status = 1; + adapter->stopped = 0; + + /* Indicate SOL/IDER usage */ + if (igc_check_reset_block(hw) < 0) + PMD_INIT_LOG(ERR, "PHY reset is blocked due to" + " SOL/IDER session."); + + PMD_INIT_LOG(DEBUG, "port_id %d vendorID=0x%x deviceID=0x%x", + eth_dev->data->port_id, pci_dev->id.vendor_id, + pci_dev->id.device_id); + + rte_intr_callback_register(&pci_dev->intr_handle, + eth_igc_interrupt_handler, (void *)eth_dev); + + /* enable uio/vfio intr/eventfd mapping */ + rte_intr_enable(&pci_dev->intr_handle); + + /* enable support intr */ + igc_intr_other_enable(eth_dev); + return 0; + +err_late: + igc_hw_control_release(hw); + return error; +} + +static int +eth_igc_dev_uninit(__rte_unused struct rte_eth_dev *eth_dev) +{ + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -EPERM; + + eth_igc_close(eth_dev); + return 0; +} + +/* + * Reset PF device. + */ +static int +eth_igc_reset(struct rte_eth_dev *dev) +{ + int ret; + + ret = eth_igc_dev_uninit(dev); + if (ret) + return ret; + + ret = eth_igc_dev_init(dev); + + return ret; +} + +static int +eth_igc_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t rctl; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + return 0; +} + +static int +eth_igc_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t rctl; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl &= (~IGC_RCTL_UPE); + if (dev->data->all_multicast == 1) + rctl |= IGC_RCTL_MPE; + else + rctl &= (~IGC_RCTL_MPE); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + return 0; +} + +static int +eth_igc_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t rctl; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl |= IGC_RCTL_MPE; + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + return 0; +} + +static int +eth_igc_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t rctl; + + if (dev->data->promiscuous == 1) + return 0; /* must remain in all_multicast mode */ + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl &= (~IGC_RCTL_MPE); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + return 0; +} + +static void +igc_read_stats_registers(struct igc_hw *hw, struct igc_hw_stats *stats) +{ + int pause_frames; + + uint64_t old_gprc = stats->gprc; + uint64_t old_gptc = stats->gptc; + uint64_t old_tpr = stats->tpr; + uint64_t old_tpt = stats->tpt; + uint64_t old_rpthc = stats->rpthc; + uint64_t old_hgptc = stats->hgptc; + + stats->crcerrs += IGC_READ_REG(hw, IGC_CRCERRS); + stats->algnerrc += IGC_READ_REG(hw, IGC_ALGNERRC); + stats->rxerrc += IGC_READ_REG(hw, IGC_RXERRC); + stats->mpc += IGC_READ_REG(hw, IGC_MPC); + stats->scc += IGC_READ_REG(hw, IGC_SCC); + stats->ecol += IGC_READ_REG(hw, IGC_ECOL); + + stats->mcc += IGC_READ_REG(hw, IGC_MCC); + stats->latecol += IGC_READ_REG(hw, IGC_LATECOL); + stats->colc += IGC_READ_REG(hw, IGC_COLC); + + stats->dc += IGC_READ_REG(hw, IGC_DC); + stats->tncrs += IGC_READ_REG(hw, IGC_TNCRS); + stats->htdpmc += IGC_READ_REG(hw, IGC_HTDPMC); + stats->rlec += IGC_READ_REG(hw, IGC_RLEC); + stats->xonrxc += IGC_READ_REG(hw, IGC_XONRXC); + stats->xontxc += IGC_READ_REG(hw, IGC_XONTXC); + + /* + * For watchdog management we need to know if we have been + * paused during the last interval, so capture that here. + */ + pause_frames = IGC_READ_REG(hw, IGC_XOFFRXC); + stats->xoffrxc += pause_frames; + stats->xofftxc += IGC_READ_REG(hw, IGC_XOFFTXC); + stats->fcruc += IGC_READ_REG(hw, IGC_FCRUC); + stats->prc64 += IGC_READ_REG(hw, IGC_PRC64); + stats->prc127 += IGC_READ_REG(hw, IGC_PRC127); + stats->prc255 += IGC_READ_REG(hw, IGC_PRC255); + stats->prc511 += IGC_READ_REG(hw, IGC_PRC511); + stats->prc1023 += IGC_READ_REG(hw, IGC_PRC1023); + stats->prc1522 += IGC_READ_REG(hw, IGC_PRC1522); + stats->gprc += IGC_READ_REG(hw, IGC_GPRC); + stats->bprc += IGC_READ_REG(hw, IGC_BPRC); + stats->mprc += IGC_READ_REG(hw, IGC_MPRC); + stats->gptc += IGC_READ_REG(hw, IGC_GPTC); + + /* For the 64-bit byte counters the low dword must be read first. */ + /* Both registers clear on the read of the high dword */ + + /* Workaround CRC bytes included in size, take away 4 bytes/packet */ + stats->gorc += IGC_READ_REG(hw, IGC_GORCL); + stats->gorc += ((uint64_t)IGC_READ_REG(hw, IGC_GORCH) << 32); + stats->gorc -= (stats->gprc - old_gprc) * RTE_ETHER_CRC_LEN; + stats->gotc += IGC_READ_REG(hw, IGC_GOTCL); + stats->gotc += ((uint64_t)IGC_READ_REG(hw, IGC_GOTCH) << 32); + stats->gotc -= (stats->gptc - old_gptc) * RTE_ETHER_CRC_LEN; + + stats->rnbc += IGC_READ_REG(hw, IGC_RNBC); + stats->ruc += IGC_READ_REG(hw, IGC_RUC); + stats->rfc += IGC_READ_REG(hw, IGC_RFC); + stats->roc += IGC_READ_REG(hw, IGC_ROC); + stats->rjc += IGC_READ_REG(hw, IGC_RJC); + + stats->mgprc += IGC_READ_REG(hw, IGC_MGTPRC); + stats->mgpdc += IGC_READ_REG(hw, IGC_MGTPDC); + stats->mgptc += IGC_READ_REG(hw, IGC_MGTPTC); + stats->b2ospc += IGC_READ_REG(hw, IGC_B2OSPC); + stats->b2ogprc += IGC_READ_REG(hw, IGC_B2OGPRC); + stats->o2bgptc += IGC_READ_REG(hw, IGC_O2BGPTC); + stats->o2bspc += IGC_READ_REG(hw, IGC_O2BSPC); + + stats->tpr += IGC_READ_REG(hw, IGC_TPR); + stats->tpt += IGC_READ_REG(hw, IGC_TPT); + + stats->tor += IGC_READ_REG(hw, IGC_TORL); + stats->tor += ((uint64_t)IGC_READ_REG(hw, IGC_TORH) << 32); + stats->tor -= (stats->tpr - old_tpr) * RTE_ETHER_CRC_LEN; + stats->tot += IGC_READ_REG(hw, IGC_TOTL); + stats->tot += ((uint64_t)IGC_READ_REG(hw, IGC_TOTH) << 32); + stats->tot -= (stats->tpt - old_tpt) * RTE_ETHER_CRC_LEN; + + stats->ptc64 += IGC_READ_REG(hw, IGC_PTC64); + stats->ptc127 += IGC_READ_REG(hw, IGC_PTC127); + stats->ptc255 += IGC_READ_REG(hw, IGC_PTC255); + stats->ptc511 += IGC_READ_REG(hw, IGC_PTC511); + stats->ptc1023 += IGC_READ_REG(hw, IGC_PTC1023); + stats->ptc1522 += IGC_READ_REG(hw, IGC_PTC1522); + stats->mptc += IGC_READ_REG(hw, IGC_MPTC); + stats->bptc += IGC_READ_REG(hw, IGC_BPTC); + stats->tsctc += IGC_READ_REG(hw, IGC_TSCTC); + + stats->iac += IGC_READ_REG(hw, IGC_IAC); + stats->rpthc += IGC_READ_REG(hw, IGC_RPTHC); + stats->hgptc += IGC_READ_REG(hw, IGC_HGPTC); + stats->icrxdmtc += IGC_READ_REG(hw, IGC_ICRXDMTC); + + /* Host to Card Statistics */ + stats->hgorc += IGC_READ_REG(hw, IGC_HGORCL); + stats->hgorc += ((uint64_t)IGC_READ_REG(hw, IGC_HGORCH) << 32); + stats->hgorc -= (stats->rpthc - old_rpthc) * RTE_ETHER_CRC_LEN; + stats->hgotc += IGC_READ_REG(hw, IGC_HGOTCL); + stats->hgotc += ((uint64_t)IGC_READ_REG(hw, IGC_HGOTCH) << 32); + stats->hgotc -= (stats->hgptc - old_hgptc) * RTE_ETHER_CRC_LEN; + stats->lenerrs += IGC_READ_REG(hw, IGC_LENERRS); +} + +/* + * Write 0 to all queue status registers + */ +static void +igc_reset_queue_stats_register(struct igc_hw *hw) +{ + int i; + + for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) { + IGC_WRITE_REG(hw, IGC_PQGPRC(i), 0); + IGC_WRITE_REG(hw, IGC_PQGPTC(i), 0); + IGC_WRITE_REG(hw, IGC_PQGORC(i), 0); + IGC_WRITE_REG(hw, IGC_PQGOTC(i), 0); + IGC_WRITE_REG(hw, IGC_PQMPRC(i), 0); + IGC_WRITE_REG(hw, IGC_RQDPC(i), 0); + IGC_WRITE_REG(hw, IGC_TQDPC(i), 0); + } +} + +static void +igc_read_queue_stats_register(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_hw_queue_stats *queue_stats = + IGC_DEV_PRIVATE_TO_QUEUE_STATS(dev->data->dev_private); + int i; + + /* + * This register is not cleared on read. Furthermore, the register wraps + * around back to 0x00000000 on the next increment when reaching a value + * of 0xFFFFFFFF and then continues normal count operation. + */ + for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) { + union { + u64 ddword; + u32 dword[2]; + } value; + u32 tmp; + + /* + * Read the register first, if the value is smaller than that + * previous read, that mean the register has been overflowed, + * then we add the high 4 bytes by 1 and replace the low 4 + * bytes by the new value. + */ + tmp = IGC_READ_REG(hw, IGC_PQGPRC(i)); + value.ddword = queue_stats->pqgprc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->pqgprc[i] = value.ddword; + + tmp = IGC_READ_REG(hw, IGC_PQGPTC(i)); + value.ddword = queue_stats->pqgptc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->pqgptc[i] = value.ddword; + + tmp = IGC_READ_REG(hw, IGC_PQGORC(i)); + value.ddword = queue_stats->pqgorc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->pqgorc[i] = value.ddword; + + tmp = IGC_READ_REG(hw, IGC_PQGOTC(i)); + value.ddword = queue_stats->pqgotc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->pqgotc[i] = value.ddword; + + tmp = IGC_READ_REG(hw, IGC_PQMPRC(i)); + value.ddword = queue_stats->pqmprc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->pqmprc[i] = value.ddword; + + tmp = IGC_READ_REG(hw, IGC_RQDPC(i)); + value.ddword = queue_stats->rqdpc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->rqdpc[i] = value.ddword; + + tmp = IGC_READ_REG(hw, IGC_TQDPC(i)); + value.ddword = queue_stats->tqdpc[i]; + if (value.dword[U32_0_IN_U64] > tmp) + value.dword[U32_1_IN_U64]++; + value.dword[U32_0_IN_U64] = tmp; + queue_stats->tqdpc[i] = value.ddword; + } +} + +static int +eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_hw_stats *stats = + IGC_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct igc_hw_queue_stats *queue_stats = + IGC_DEV_PRIVATE_TO_QUEUE_STATS(dev->data->dev_private); + int i; + + /* + * Cancel status handler since it will read the queue status registers + */ + rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev); + + /* Read status register */ + igc_read_queue_stats_register(dev); + igc_read_stats_registers(hw, stats); + + if (rte_stats == NULL) { + /* Restart queue status handler */ + rte_eal_alarm_set(IGC_ALARM_INTERVAL, + igc_update_queue_stats_handler, dev); + return -EINVAL; + } + + /* Rx Errors */ + rte_stats->imissed = stats->mpc; + rte_stats->ierrors = stats->crcerrs + + stats->rlec + stats->ruc + stats->roc + + stats->rxerrc + stats->algnerrc; + + /* Tx Errors */ + rte_stats->oerrors = stats->ecol + stats->latecol; + + rte_stats->ipackets = stats->gprc; + rte_stats->opackets = stats->gptc; + rte_stats->ibytes = stats->gorc; + rte_stats->obytes = stats->gotc; + + RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < IGC_QUEUE_PAIRS_NUM); + + for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) { + rte_stats->q_ipackets[i] = queue_stats->pqgprc[i]; + rte_stats->q_opackets[i] = queue_stats->pqgptc[i]; + rte_stats->q_ibytes[i] = queue_stats->pqgorc[i]; + rte_stats->q_obytes[i] = queue_stats->pqgotc[i]; + rte_stats->q_errors[i] = queue_stats->rqdpc[i]; + } + + /* Restart queue status handler */ + rte_eal_alarm_set(IGC_ALARM_INTERVAL, + igc_update_queue_stats_handler, dev); + return 0; +} + +static int +eth_igc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + unsigned int n) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_hw_stats *hw_stats = + IGC_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + unsigned int i; + + igc_read_stats_registers(hw, hw_stats); + + if (n < IGC_NB_XSTATS) + return IGC_NB_XSTATS; + + /* If this is a reset xstats is NULL, and we have cleared the + * registers by reading them. + */ + if (!xstats) + return 0; + + /* Extended stats */ + for (i = 0; i < IGC_NB_XSTATS; i++) { + xstats[i].id = i; + xstats[i].value = *(uint64_t *)(((char *)hw_stats) + + rte_igc_stats_strings[i].offset); + } + + return IGC_NB_XSTATS; +} + +static int +eth_igc_xstats_reset(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_hw_stats *hw_stats = + IGC_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct igc_hw_queue_stats *queue_stats = + IGC_DEV_PRIVATE_TO_QUEUE_STATS(dev->data->dev_private); + + rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev); + + /* HW registers are cleared on read */ + igc_reset_queue_stats_register(hw); + igc_read_stats_registers(hw, hw_stats); + + /* Reset software totals */ + memset(hw_stats, 0, sizeof(*hw_stats)); + memset(queue_stats, 0, sizeof(*queue_stats)); + rte_eal_alarm_set(IGC_ALARM_INTERVAL, igc_update_queue_stats_handler, + dev); + + return 0; +} + +static int +eth_igc_xstats_get_names(__rte_unused struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, unsigned int size) +{ + unsigned int i; + + if (xstats_names == NULL) + return IGC_NB_XSTATS; + + if (size < IGC_NB_XSTATS) { + PMD_DRV_LOG(ERR, "not enough buffers!"); + return IGC_NB_XSTATS; + } + + for (i = 0; i < IGC_NB_XSTATS; i++) { + strlcpy(xstats_names[i].name, rte_igc_stats_strings[i].name, + sizeof(xstats_names[i].name)); + } + + return IGC_NB_XSTATS; +} + +static int +eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, const uint64_t *ids, + unsigned int limit) +{ + unsigned int i; + + if (!ids) + return eth_igc_xstats_get_names(dev, xstats_names, limit); + + for (i = 0; i < limit; i++) { + if (ids[i] >= IGC_NB_XSTATS) { + PMD_INIT_LOG(ERR, "id value isn't valid"); + return -EINVAL; + } + strlcpy(xstats_names[i].name, + rte_igc_stats_strings[i].name, + sizeof(xstats_names[i].name)); + } + return limit; +} + +static int +eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, + uint64_t *values, unsigned int n) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_hw_stats *hw_stats = + IGC_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + unsigned int i; + + igc_read_stats_registers(hw, hw_stats); + + if (!ids) { + if (n < IGC_NB_XSTATS) + return IGC_NB_XSTATS; + + /* If this is a reset xstats is NULL, and we have cleared the + * registers by reading them. + */ + if (!values) + return 0; + + /* Extended stats */ + for (i = 0; i < IGC_NB_XSTATS; i++) + values[i] = *(uint64_t *)(((char *)hw_stats) + + rte_igc_stats_strings[i].offset); + + return IGC_NB_XSTATS; + + } else { + for (i = 0; i < n; i++) { + if (ids[i] >= IGC_NB_XSTATS) { + PMD_INIT_LOG(ERR, "id value isn't valid"); + return -EINVAL; + } + values[i] = *(uint64_t *)(((char *)hw_stats) + + rte_igc_stats_strings[ids[i]].offset); + } + return n; + } +} + +static int +eth_igc_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, + uint16_t queue_id, uint8_t stat_idx, __rte_unused uint8_t is_rx) +{ + if (queue_id == stat_idx) + return 0; + return -EIO; +} + +static int +eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + size_t fw_size) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_fw_version fw; + int ret; + + igc_get_fw_version(hw, &fw); + + /* if option rom is valid, display its version too */ + if (fw.or_valid) { + ret = snprintf(fw_version, fw_size, + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else { + if (fw.etrack_id != 0X0000) { + ret = snprintf(fw_version, fw_size, + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, + fw.etrack_id); + } else { + ret = snprintf(fw_version, fw_size, + "%d.%d.%d", + fw.eep_major, fw.eep_minor, + fw.eep_build); + } + } + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + +static int +eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ + dev_info->max_rx_pktlen = 0x2600; /* See RLPML register. */ + dev_info->max_mac_addrs = hw->mac.rar_entry_count; + dev_info->rx_queue_offload_capa = IGC_RX_OFFLOAD_ALL; + dev_info->rx_offload_capa = dev_info->rx_queue_offload_capa; + dev_info->tx_queue_offload_capa = IGC_TX_OFFLOAD_ALL; + dev_info->tx_offload_capa = dev_info->tx_queue_offload_capa; + + dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM; + dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM; + dev_info->max_vmdq_pools = 0; + + dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t); + dev_info->reta_size = ETH_RSS_RETA_SIZE_128; + dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL; + + dev_info->default_rxconf = (struct rte_eth_rxconf) { + .rx_thresh = { + .pthresh = IGC_DEFAULT_RX_PTHRESH, + .hthresh = IGC_DEFAULT_RX_HTHRESH, + .wthresh = IGC_DEFAULT_RX_WTHRESH, + }, + .rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH, + .rx_drop_en = 0, + .offloads = 0, + }; + + dev_info->default_txconf = (struct rte_eth_txconf) { + .tx_thresh = { + .pthresh = IGC_DEFAULT_TX_PTHRESH, + .hthresh = IGC_DEFAULT_TX_HTHRESH, + .wthresh = IGC_DEFAULT_TX_WTHRESH, + }, + .offloads = 0, + }; + + dev_info->rx_desc_lim = rx_desc_lim; + dev_info->tx_desc_lim = tx_desc_lim; + + dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | + ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M | + ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G; + + dev_info->max_mtu = dev_info->max_rx_pktlen - IGC_ETH_OVERHEAD; + dev_info->min_mtu = RTE_ETHER_MIN_MTU; + + return 0; +} + +static const uint32_t * +eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev) +{ + static const uint32_t ptypes[] = { + /* refers to igb_rxd_pkt_info_to_pkt_type() */ + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L3_IPV4, + RTE_PTYPE_L3_IPV4_EXT, + RTE_PTYPE_L3_IPV6, + RTE_PTYPE_L3_IPV6_EXT, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_L4_SCTP, + RTE_PTYPE_TUNNEL_IP, + RTE_PTYPE_INNER_L3_IPV6, + RTE_PTYPE_INNER_L3_IPV6_EXT, + RTE_PTYPE_INNER_L4_TCP, + RTE_PTYPE_INNER_L4_UDP, + RTE_PTYPE_UNKNOWN + }; + + return ptypes; +} + +static int +eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + uint32_t rctl; + struct igc_hw *hw; + struct rte_eth_dev_info dev_info; + uint32_t frame_size = mtu + IGC_ETH_OVERHEAD; + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + eth_igc_infos_get(dev, &dev_info); + + /* check that mtu is within the allowed range */ + if (mtu < RTE_ETHER_MIN_MTU || + frame_size > dev_info.max_rx_pktlen) + return -EINVAL; + + /* + * refuse mtu that requires the support of scattered packets when + * this feature has not been enabled before. + */ + if (!dev->data->scattered_rx && + frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) + return -EINVAL; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + + /* switch to jumbo mode if needed */ + if (frame_size > RTE_ETHER_MAX_LEN) { + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_JUMBO_FRAME; + rctl |= IGC_RCTL_LPE; + } else { + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_JUMBO_FRAME; + rctl &= ~IGC_RCTL_LPE; + } + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + + /* update max frame size */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size; + + IGC_WRITE_REG(hw, IGC_RLPML, + dev->data->dev_conf.rxmode.max_rx_pkt_len); + + return 0; +} + +static int +eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + uint32_t vec = IGC_MISC_VEC_ID; + + if (rte_intr_allow_others(intr_handle)) + vec = IGC_RX_VEC_START; + + uint32_t mask = 1 << (queue_id + vec); + + IGC_WRITE_REG(hw, IGC_EIMC, mask); + IGC_WRITE_FLUSH(hw); + + return 0; +} + +static int +eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + uint32_t vec = IGC_MISC_VEC_ID; + + if (rte_intr_allow_others(intr_handle)) + vec = IGC_RX_VEC_START; + + uint32_t mask = 1 << (queue_id + vec); + + IGC_WRITE_REG(hw, IGC_EIMS, mask); + IGC_WRITE_FLUSH(hw); + + rte_intr_enable(intr_handle); + + return 0; +} + +static int +eth_igc_led_on(struct rte_eth_dev *dev) +{ + struct igc_hw *hw; + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + return igc_led_on(hw) == IGC_SUCCESS ? 0 : -ENOTSUP; +} + +static int +eth_igc_led_off(struct rte_eth_dev *dev) +{ + struct igc_hw *hw; + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP; +} + +static int +eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) +{ + struct igc_hw *hw; + uint32_t ctrl; + int tx_pause; + int rx_pause; + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + fc_conf->pause_time = hw->fc.pause_time; + fc_conf->high_water = hw->fc.high_water; + fc_conf->low_water = hw->fc.low_water; + fc_conf->send_xon = hw->fc.send_xon; + fc_conf->autoneg = hw->mac.autoneg; + + /* + * Return rx_pause and tx_pause status according to actual setting of + * the TFCE and RFCE bits in the CTRL register. + */ + ctrl = IGC_READ_REG(hw, IGC_CTRL); + if (ctrl & IGC_CTRL_TFCE) + tx_pause = 1; + else + tx_pause = 0; + + if (ctrl & IGC_CTRL_RFCE) + rx_pause = 1; + else + rx_pause = 0; + + if (rx_pause && tx_pause) + fc_conf->mode = RTE_FC_FULL; + else if (rx_pause) + fc_conf->mode = RTE_FC_RX_PAUSE; + else if (tx_pause) + fc_conf->mode = RTE_FC_TX_PAUSE; + else + fc_conf->mode = RTE_FC_NONE; + + return 0; +} + +static int +eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + enum igc_fc_mode rte_fcmode_2_igc_fcmode[] = { + igc_fc_none, + igc_fc_rx_pause, + igc_fc_tx_pause, + igc_fc_full + }; + uint32_t rx_buf_size; + uint32_t max_high_water; + uint32_t rctl; + int err; + + if (fc_conf->autoneg != hw->mac.autoneg) + return -ENOTSUP; + + rx_buf_size = igc_get_rx_buffer_size(hw); + PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size); + + /* At least reserve one Ethernet frame for watermark */ + max_high_water = rx_buf_size - RTE_ETHER_MAX_LEN; + if (fc_conf->high_water > max_high_water || + fc_conf->high_water < fc_conf->low_water) { + PMD_INIT_LOG(ERR, "e1000 incorrect high/low water value"); + PMD_INIT_LOG(ERR, "high water must <= 0x%x", max_high_water); + return -EINVAL; + } + + hw->fc.requested_mode = rte_fcmode_2_igc_fcmode[fc_conf->mode]; + hw->fc.pause_time = fc_conf->pause_time; + hw->fc.high_water = fc_conf->high_water; + hw->fc.low_water = fc_conf->low_water; + hw->fc.send_xon = fc_conf->send_xon; + + err = igc_setup_link_generic(hw); + if (err == IGC_SUCCESS) { + /** + * check if we want to forward MAC frames - driver doesn't have + * native capability to do that, so we'll write the registers + * ourselves + **/ + rctl = IGC_READ_REG(hw, IGC_RCTL); + + /* set or clear MFLCN.PMCF bit depending on configuration */ + if (fc_conf->mac_ctrl_frame_fwd != 0) + rctl |= IGC_RCTL_PMCF; + else + rctl &= ~IGC_RCTL_PMCF; + + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + IGC_WRITE_FLUSH(hw); + + return 0; + } + + PMD_INIT_LOG(ERR, "igc_setup_link_generic = 0x%x", err); + return -EIO; +} + +static int +eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr, + uint32_t index, uint32_t pool) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + igc_rar_set(hw, mac_addr->addr_bytes, index); + RTE_SET_USED(pool); + return 0; +} + +static void +eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index) +{ + uint8_t addr[RTE_ETHER_ADDR_LEN]; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + memset(addr, 0, sizeof(addr)); + igc_rar_set(hw, addr, index); +} + +static int +eth_igc_default_mac_addr_set(struct rte_eth_dev *dev, + struct rte_ether_addr *addr) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + igc_rar_set(hw, addr->addr_bytes, 0); + return 0; +} + +static int +eth_igc_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint8_t i, j, mask; + uint16_t idx, shift; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (reta_size != ETH_RSS_RETA_SIZE_128) { + PMD_DRV_LOG(ERR, "The size of hash lookup table configured " + "(%d) doesn't match the number hardware can supported " + "(%d)", reta_size, ETH_RSS_RETA_SIZE_128); + return -EINVAL; + } + + /* each register 4 entry */ + for (i = 0; i < reta_size; i += 4) { + union igc_reta { + uint32_t dword; + uint8_t bytes[4]; + } reta, r; + + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + mask = (uint8_t)((reta_conf[idx].mask >> shift) & 0xf); + if (!mask) + continue; + + if (mask == 0xf) + r.dword = 0; + else + r.dword = IGC_READ_REG_LE_VALUE(hw, + IGC_RETA(i >> 2)); + + for (j = 0; j < 4; j++) { + if (mask & (0x1 << j)) + reta.bytes[j] = + (uint8_t)reta_conf[idx].reta[shift + j]; + else + reta.bytes[j] = r.bytes[j]; + } + IGC_WRITE_REG_LE_VALUE(hw, IGC_RETA(i >> 2), reta.dword); + } + + return 0; +} + +static int +eth_igc_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint8_t i, j, mask; + uint16_t idx, shift; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (reta_size != ETH_RSS_RETA_SIZE_128) { + PMD_DRV_LOG(ERR, "The size of hash lookup table configured " + "(%d) doesn't match the number hardware can supported " + "(%d)", reta_size, ETH_RSS_RETA_SIZE_128); + return -EINVAL; + } + + /* each register 4 entry */ + for (i = 0; i < reta_size; i += 4) { + union igc_reta { + uint32_t dword; + uint8_t bytes[4]; + } reta; + + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + mask = (uint8_t)((reta_conf[idx].mask >> shift) & 0xf); + if (!mask) + continue; + + reta.dword = IGC_READ_REG_LE_VALUE(hw, IGC_RETA(i >> 2)); + for (j = 0; j < 4; j++) { + if (mask & (0x1 << j)) + reta_conf[idx].reta[shift + j] = reta.bytes[j]; + } + } + + return 0; +} + +static int +eth_igc_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint64_t rss_hf = rss_conf->rss_hf & IGC_RSS_OFFLOAD_ALL; + uint32_t mrqc = IGC_READ_REG(hw, IGC_MRQC); + + /* + * Before changing anything, first check that the update RSS operation + * does not attempt to disable RSS, if RSS was enabled at + * initialization time, or does not attempt to enable RSS, if RSS was + * disabled at initialization time. + */ + if (!(mrqc & IGC_MRQC_ENABLE_MASK)) { /* RSS disabled */ + if (rss_hf != 0) + return -(EINVAL); + return 0; /* Nothing to do */ + } + + /* RSS enabled */ + if (rss_hf == 0) /* Disable RSS */ + return -(EINVAL); + igc_hw_rss_hash_set(hw, rss_conf); + return 0; +} + +static int +eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint8_t *hash_key = rss_conf->rss_key; + uint32_t mrqc; + uint64_t rss_hf; + + if (hash_key != NULL) { + int i; + for (i = 0; i < 10; i++) { + uint32_t rss_key = + IGC_READ_REG_ARRAY(hw, IGC_RSSRK(0), i); + hash_key[(i * 4)] = rss_key & 0x000000FF; + hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF; + hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF; + hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF; + } + } + + /* Get RSS functions configured in MRQC register */ + mrqc = IGC_READ_REG(hw, IGC_MRQC); + if ((mrqc & IGC_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */ + rss_conf->rss_hf = 0; + return 0; + } + + rss_hf = 0; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV4) + rss_hf |= ETH_RSS_IPV4; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_TCP) + rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV6) + rss_hf |= ETH_RSS_IPV6; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_EX) + rss_hf |= ETH_RSS_IPV6_EX; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP) + rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP_EX) + rss_hf |= ETH_RSS_IPV6_TCP_EX; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_UDP) + rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP) + rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; + if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP_EX) + rss_hf |= ETH_RSS_IPV6_UDP_EX; + + rss_conf->rss_hf = rss_hf; + return 0; +} + +static int +eth_igc_set_mc_addr_list(struct rte_eth_dev *dev, + struct rte_ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr); + return 0; +} + +static int +eth_igc_get_eeprom_length(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Return unit is byte count */ + return hw->nvm.word_size * 2; +} + +static int +eth_igc_timesync_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t tsync_ctl; + uint32_t tsauxc; + uint16_t i; + + IGC_WRITE_REG(hw, IGC_TIMINCA, 0x0); + + /* enable SYSTIM 0. */ + tsauxc = IGC_READ_REG(hw, IGC_TSAUXC); + tsauxc &= ~IGC_DISABLE_TIMER0_MSK; + IGC_WRITE_REG(hw, IGC_TSAUXC, tsauxc); + + /* Enable L2 filtering of IEEE1588/802.1AS Ethernet frame types. */ + IGC_WRITE_REG(hw, IGC_ETQF(IGC_ETQF_FILTER_1588), + RTE_ETHER_TYPE_1588 | IGC_ETQF_FILTER_ENABLE | + IGC_ETQF_1588); + + for (i = 0; i < dev->data->nb_rx_queues; i++) + igc_enable_rx_queue_timestamp(dev, i); + + /* Enable timestamping of received all packets. */ + tsync_ctl = IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_ALL | + IGC_TSYNCRXCTL_SYNSIG_PHY; + IGC_WRITE_REG(hw, IGC_TSYNCRXCTL, tsync_ctl); + + /* Enable timestamping of transmitted PTP packets. */ + tsync_ctl = IGC_READ_REG(hw, IGC_TSYNCTXCTL); + tsync_ctl |= IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG; + IGC_WRITE_REG(hw, IGC_TSYNCTXCTL, tsync_ctl); + + return 0; +} + +static int +eth_igc_timesync_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t tsync_ctl; + + /* Disable timestamping of transmitted PTP packets. */ + tsync_ctl = IGC_READ_REG(hw, IGC_TSYNCTXCTL); + tsync_ctl &= ~IGC_TSYNCTXCTL_ENABLED; + IGC_WRITE_REG(hw, IGC_TSYNCTXCTL, tsync_ctl); + + /* Disable timestamping of received PTP packets. */ + tsync_ctl = IGC_READ_REG(hw, IGC_TSYNCRXCTL); + tsync_ctl &= ~IGC_TSYNCRXCTL_ENABLED; + IGC_WRITE_REG(hw, IGC_TSYNCRXCTL, tsync_ctl); + + /* Disable L2 filtering of IEEE1588/802.1AS Ethernet frame types. */ + IGC_WRITE_REG(hw, IGC_ETQF(IGC_ETQF_FILTER_1588), 0); + return 0; +} + +static int +eth_igc_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* + * SYSTIMEL stores ns and SYSTIMEH stores seconds. + * Reading the SYSTIML register, the upper 32bits are latched to a + * SYSTIMH shadow register for coherent context + */ + ts->tv_nsec = IGC_READ_REG(hw, IGC_SYSTIML); + ts->tv_sec = IGC_READ_REG(hw, IGC_SYSTIMH); + return 0; +} + +/* + * Write register and wait for being auto-cleared + * @hw: hardware structure + * @adjust: register value(see register definition) + */ +static inline int +igc_timesync_adjust_reg(struct igc_hw *hw, uint32_t reg_val) +{ + const int try_max = 10000; + int i; + + IGC_WRITE_REG(hw, IGC_TIMADJL, reg_val); + + /* + * Every 3.25ns for 2.5G and 8ns 1G, the register + * TIMADJ will be auto-cleared by the hardware and + * the SYSTIM registers are updated. + */ + for (i = 0; i < try_max; i++) { + if (IGC_READ_REG(hw, IGC_TIMADJL) == 0) + break; + } + if (i >= try_max) { + PMD_DRV_LOG(DEBUG, "Adjust failed, may NIC stopped."); + return -EIO; + } + return 0; +} + +static int +eth_igc_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t adjust_sign = 0; + + if (delta == 0) + return 0; + + if (IGC_READ_REG(hw, IGC_TIMADJL)) { + PMD_DRV_LOG(DEBUG, "Previous time adjusting not been" + " finished, try it latter."); + return -EAGAIN; + } + + if (delta < 0) { + delta = 0 - delta; + adjust_sign = IGC_TIMADJ_SIGN_MINUS; + } + + if (delta > IGC_TIMADJ_MAX) { + /* Adjust IGC_TIMADJ_MAX nano seconds each time. */ + uint32_t reg_val = IGC_TIMADJ_MAX | + IGC_TIMADJ_METH_SINGLE | adjust_sign; + + do { + int ret = igc_timesync_adjust_reg(hw, reg_val); + if (ret) + return ret; + delta -= IGC_TIMADJ_MAX; + } while (delta > IGC_TIMADJ_MAX); + } + + return igc_timesync_adjust_reg(hw, + (uint32_t)delta | IGC_TIMADJ_METH_SINGLE | adjust_sign); +} + +static int +eth_igc_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct timespec now; + + /* + * It makes sense to set only the SYSTIMH register (the sec units of + * the timer). Setting the sub sec units can be made by the + * "Time adjust" procedure. + */ + IGC_WRITE_REG(hw, IGC_SYSTIMH, (uint32_t)ts->tv_sec); + eth_igc_timesync_read_time(dev, &now); + + return eth_igc_timesync_adjust_time(dev, + rte_timespec_to_ns(ts) - rte_timespec_to_ns(&now)); +} + +static int +eth_igc_timesync_read_tx_timestamp(struct rte_eth_dev *dev, + struct timespec *timestamp) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t tsync_txctl; + + tsync_txctl = IGC_READ_REG(hw, IGC_TSYNCTXCTL); + if ((tsync_txctl & IGC_TSYNCTXCTL_VALID) == 0) + return -EINVAL; + + /* RXSTMPL stores ns and RXSTMPH stores seconds. */ + timestamp->tv_nsec = IGC_READ_REG(hw, IGC_TXSTMPL); + timestamp->tv_sec = IGC_READ_REG(hw, IGC_TXSTMPH); + return 0; +} + +static int +eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_probe(pci_dev, + sizeof(struct igc_adapter), eth_igc_dev_init); +} + +static int eth_igc_pci_remove(struct rte_pci_device *pci_dev __rte_unused) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, eth_igc_dev_uninit); +} + +static struct rte_pci_driver rte_igc_pmd = { + .id_table = pci_id_igc_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_igc_pci_probe, + .remove = eth_igc_pci_remove, +}; + +RTE_PMD_REGISTER_PCI(net_igc, rte_igc_pmd); +RTE_PMD_REGISTER_PCI_TABLE(net_igc, pci_id_igc_map); +RTE_PMD_REGISTER_KMOD_DEP(net_igc, "* igb_uio | uio_pci_generic | vfio-pci"); diff --git a/drivers/net/igc/igc_ethdev.h b/drivers/net/igc/igc_ethdev.h new file mode 100644 index 0000000..7f836ed --- /dev/null +++ b/drivers/net/igc/igc_ethdev.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#ifndef _IGC_ETHDEV_H_ +#define _IGC_ETHDEV_H_ + +#include +#include +#include +#include + +#include "base/e1000_osdep.h" +#include "base/e1000_hw.h" +#include "base/e1000_i225.h" +#include "base/e1000_api.h" + +#define IGC_INTEL_VENDOR_ID 0x8086 + +/* need update link, bit flag */ +#define IGC_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0) + +/* VLAN filter table size */ +#define IGC_VFTA_SIZE 128 + +#define IGC_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET +#define IGC_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET + +/* + * The overhead from MTU to max frame size. + * Considering VLAN so a tag needs to be counted. + */ +#define IGC_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \ + VLAN_TAG_SIZE) + +#define IGC_ADVTXD_POPTS_TXSM 0x00000200 /* L4 Checksum offload request */ +#define IGC_ADVTXD_POPTS_IXSM 0x00000100 /* IP Checksum offload request */ +#define IGC_ADVTXD_TUCMD_L4T_RSV 0x00001800 /* L4 Packet TYPE of Reserved */ +#define IGC_CTRL_EXT_EXTEND_VLAN (1UL << 26)/* EXTENDED VLAN */ + +#define IGC_TSYNCRXCTL_SYNSIG_PHY (1UL << 10) +#define IGC_SRRCTL_TIMESTAMP_EN (1UL << 30) +#define IGC_SRRCTL_TIME1_SHIFT 14 +#define IGC_SRRCTL_TIME0_SHIFT 17 +#define IGC_SRRCTL_TIME1_MSK (3UL << IGC_SRRCTL_TIME1_SHIFT) +#define IGC_SRRCTL_TIME0_MSK (3UL << IGC_SRRCTL_TIME0_SHIFT) +#define IGC_RXD_STAT_TSIP (1UL << 15) + +/* + * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be + * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. + * This will also optimize cache line size effect. + * H/W supports up to cache line size 128. + */ +#define IGC_ALIGN 128 + +#define IGC_TX_DESCRIPTOR_MULTIPLE 8 +#define IGC_RX_DESCRIPTOR_MULTIPLE 8 + +#define IGC_HKEY_MAX_INDEX 10 +#define IGC_QUEUE_PAIRS_NUM 4 + +#define IGC_RXD_ALIGN ((uint16_t)(IGC_ALIGN / \ + sizeof(union igc_adv_rx_desc))) +#define IGC_TXD_ALIGN ((uint16_t)(IGC_ALIGN / \ + sizeof(union igc_adv_tx_desc))) +#define IGC_MIN_TXD IGC_TX_DESCRIPTOR_MULTIPLE +#define IGC_MAX_TXD ((uint16_t)(0x80000 / sizeof(union igc_adv_tx_desc))) +#define IGC_MIN_RXD IGC_RX_DESCRIPTOR_MULTIPLE +#define IGC_MAX_RXD ((uint16_t)(0x80000 / sizeof(union igc_adv_rx_desc))) + +#define IGC_TX_MAX_SEG UINT8_MAX +#define IGC_TX_MAX_MTU_SEG UINT8_MAX + +#define IGC_RX_OFFLOAD_ALL \ + (DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_VLAN_FILTER | \ + DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM | \ + DEV_RX_OFFLOAD_JUMBO_FRAME | \ + DEV_RX_OFFLOAD_KEEP_CRC | \ + DEV_RX_OFFLOAD_SCATTER | \ + DEV_RX_OFFLOAD_TIMESTAMP | \ + DEV_RX_OFFLOAD_QINQ_STRIP) + +#define IGC_TX_OFFLOAD_ALL \ + (DEV_TX_OFFLOAD_VLAN_INSERT | \ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_SCTP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_MULTI_SEGS | \ + DEV_TX_OFFLOAD_QINQ_INSERT) + +#define IGC_RSS_OFFLOAD_ALL ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_IPV6_EX | \ + ETH_RSS_IPV6_TCP_EX | \ + ETH_RSS_IPV6_UDP_EX) + +#define IGC_ETQF_FILTER_1588 3 + +struct igc_rte_flow_rss_conf { + struct rte_flow_action_rss conf; /**< RSS parameters. */ + uint8_t key[IGC_HKEY_MAX_INDEX * sizeof(uint32_t)]; /* Hash key. */ + /* Queues indices to use. */ + uint16_t queue[IGC_QUEUE_PAIRS_NUM]; +}; + +/* Structure to per-queue statics */ +struct igc_hw_queue_stats { + u64 pqgprc[IGC_QUEUE_PAIRS_NUM]; + /* per queue good packets received count */ + u64 pqgptc[IGC_QUEUE_PAIRS_NUM]; + /* per queue good packets transmitted count */ + u64 pqgorc[IGC_QUEUE_PAIRS_NUM]; + /* per queue good octets received count */ + u64 pqgotc[IGC_QUEUE_PAIRS_NUM]; + /* per queue good octets transmitted count */ + u64 pqmprc[IGC_QUEUE_PAIRS_NUM]; + /* per queue multicast packets received count */ + u64 rqdpc[IGC_QUEUE_PAIRS_NUM]; + /* per receive queue drop packet count */ + u64 tqdpc[IGC_QUEUE_PAIRS_NUM]; + /* per transmit queue drop packet count */ +}; + +/* structure for interrupt relative data */ +struct igc_interrupt { + uint32_t flags; + uint32_t mask; +}; + +/* local vfta copy */ +struct igc_vfta { + uint32_t vfta[IGC_VFTA_SIZE]; +}; + +/* + * Structure to store private data for each driver instance (for each port). + */ +struct igc_adapter { + struct igc_hw hw; + struct igc_hw_stats stats; + struct igc_hw_queue_stats queue_stats; + struct igc_interrupt intr; + struct igc_vfta shadow_vfta; + bool stopped; + struct rte_timecounter systime_tc; + struct rte_timecounter rx_tstamp_tc; + struct rte_timecounter tx_tstamp_tc; +}; + +#define IGC_DEV_PRIVATE(adapter) \ + ((struct igc_adapter *)adapter) + +#define IGC_DEV_PRIVATE_TO_HW(adapter) \ + (&((struct igc_adapter *)adapter)->hw) + +#define IGC_DEV_PRIVATE_TO_STATS(adapter) \ + (&((struct igc_adapter *)adapter)->stats) + +#define IGC_DEV_PRIVATE_TO_QUEUE_STATS(adapter) \ + (&((struct igc_adapter *)adapter)->queue_stats) + +#define IGC_DEV_PRIVATE_TO_INTR(adapter) \ + (&((struct igc_adapter *)adapter)->intr) + +#define IGC_DEV_PRIVATE_TO_VFTA(adapter) \ + (&((struct igc_adapter *)adapter)->shadow_vfta) + +#endif diff --git a/drivers/net/igc/igc_logs.c b/drivers/net/igc/igc_logs.c new file mode 100644 index 0000000..c653783 --- /dev/null +++ b/drivers/net/igc/igc_logs.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include "igc_logs.h" +#include "rte_common.h" + +/* declared as extern in igc_logs.h */ +int igc_logtype_init = -1; +int igc_logtype_driver = -1; + +RTE_INIT(igc_init_log) +{ + igc_logtype_init = rte_log_register("pmd.net.igc.init"); + if (igc_logtype_init >= 0) + rte_log_set_level(igc_logtype_init, RTE_LOG_INFO); + + igc_logtype_driver = rte_log_register("pmd.net.igc.driver"); + if (igc_logtype_driver >= 0) + rte_log_set_level(igc_logtype_driver, RTE_LOG_INFO); +} diff --git a/drivers/net/igc/igc_logs.h b/drivers/net/igc/igc_logs.h new file mode 100644 index 0000000..632bf4a --- /dev/null +++ b/drivers/net/igc/igc_logs.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#ifndef _IGC_LOGS_H_ +#define _IGC_LOGS_H_ + +#include + +extern int igc_logtype_init; +extern int igc_logtype_driver; + +#define PMD_INIT_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, igc_logtype_init, \ + "%s(): " fmt "\n", __func__, ##args) + +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") + +#ifdef RTE_LIBRTE_IGC_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_IGC_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_IGC_DEBUG_TX_FREE +#define PMD_TX_FREE_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while (0) +#endif + +extern int igc_logtype_driver; +#define PMD_DRV_LOG_RAW(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, igc_logtype_driver, "%s(): " fmt, \ + __func__, ## args) + +#define PMD_DRV_LOG(level, fmt, args...) \ + PMD_DRV_LOG_RAW(level, fmt "\n", ## args) + +#endif /* _IGC_LOGS_H_ */ diff --git a/drivers/net/igc/igc_txrx.c b/drivers/net/igc/igc_txrx.c new file mode 100644 index 0000000..2336443 --- /dev/null +++ b/drivers/net/igc/igc_txrx.c @@ -0,0 +1,2237 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#include +#include +#include +#include + +#include "igc_logs.h" +#include "igc_txrx.h" + +#ifdef RTE_PMD_USE_PREFETCH +#define rte_igc_prefetch(p) rte_prefetch0(p) +#else +#define rte_igc_prefetch(p) do {} while (0) +#endif + +#ifdef RTE_PMD_PACKET_PREFETCH +#define rte_packet_prefetch(p) rte_prefetch1(p) +#else +#define rte_packet_prefetch(p) do {} while (0) +#endif + +/* Multicast / Unicast table offset mask. */ +#define IGC_RCTL_MO_MSK (3 << IGC_RCTL_MO_SHIFT) + +/* Loopback mode. */ +#define IGC_RCTL_LBM_SHIFT 6 +#define IGC_RCTL_LBM_MSK (3 << IGC_RCTL_LBM_SHIFT) + +/* Hash select for MTA */ +#define IGC_RCTL_HSEL_SHIFT 8 +#define IGC_RCTL_HSEL_MSK (3 << IGC_RCTL_HSEL_SHIFT) +#define IGC_RCTL_PSP (1 << 21) + +/* Receive buffer size for header buffer */ +#define IGC_SRRCTL_BSIZEHEADER_SHIFT 8 + +/* RX descriptor status and error flags */ +#define IGC_RXD_STAT_L4CS (1 << 5) +#define IGC_RXD_STAT_VEXT (1 << 9) +#define IGC_RXD_STAT_LLINT (1 << 11) +#define IGC_RXD_STAT_SCRC (1 << 12) +#define IGC_RXD_STAT_SMDT_MASK (3 << 13) +#define IGC_RXD_STAT_MC (1 << 19) +#define IGC_RXD_EXT_ERR_L4E (1 << 29) +#define IGC_RXD_EXT_ERR_IPE (1 << 30) +#define IGC_RXD_EXT_ERR_RXE (1 << 31) +#define IGC_RXD_RSS_TYPE_MASK 0xf +#define IGC_RXD_PCTYPE_MASK (0x7f << 4) +#define IGC_RXD_ETQF_SHIFT 12 +#define IGC_RXD_ETQF_MSK (0xfUL << IGC_RXD_ETQF_SHIFT) +#define IGC_RXD_VPKT (1 << 16) + +/* ETQF register index for 1588 */ +#define IGC_ETQF_FILTER_1588 3 + +/* TXD control bits */ +#define IGC_TXDCTL_PTHRESH_SHIFT 0 +#define IGC_TXDCTL_HTHRESH_SHIFT 8 +#define IGC_TXDCTL_WTHRESH_SHIFT 16 +#define IGC_TXDCTL_PTHRESH_MSK (0x1f << IGC_TXDCTL_PTHRESH_SHIFT) +#define IGC_TXDCTL_HTHRESH_MSK (0x1f << IGC_TXDCTL_HTHRESH_SHIFT) +#define IGC_TXDCTL_WTHRESH_MSK (0x1f << IGC_TXDCTL_WTHRESH_SHIFT) + +/* RXD control bits */ +#define IGC_RXDCTL_PTHRESH_SHIFT 0 +#define IGC_RXDCTL_HTHRESH_SHIFT 8 +#define IGC_RXDCTL_WTHRESH_SHIFT 16 +#define IGC_RXDCTL_PTHRESH_MSK (0x1f << IGC_RXDCTL_PTHRESH_SHIFT) +#define IGC_RXDCTL_HTHRESH_MSK (0x1f << IGC_RXDCTL_HTHRESH_SHIFT) +#define IGC_RXDCTL_WTHRESH_MSK (0x1f << IGC_RXDCTL_WTHRESH_SHIFT) + +#define IGC_TSO_MAX_HDRLEN 512 +#define IGC_TSO_MAX_MSS 9216 + +/* Header size for timestamp */ +#define IGC_TIMHDR_SIZE 16 + +/* Bit Mask to indicate what bits required for building TX context */ +#define IGC_TX_OFFLOAD_MASK ( \ + PKT_TX_OUTER_IPV6 | \ + PKT_TX_OUTER_IPV4 | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ + PKT_TX_VLAN_PKT | \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG | \ + PKT_TX_IEEE1588_TMST) + +#define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK) + +/** + * Structure associated with each descriptor of the RX ring of a RX queue. + */ +struct igc_rx_entry { + struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */ +}; + +/** + * Structure associated with each RX queue. + */ +struct igc_rx_queue { + struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */ + volatile union igc_adv_rx_desc *rx_ring; + /**< RX ring virtual address. */ + uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */ + volatile uint32_t *rdt_reg_addr; /**< RDT register address. */ + volatile uint32_t *rdh_reg_addr; /**< RDH register address. */ + struct igc_rx_entry *sw_ring; /**< address of RX software ring. */ + struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */ + struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */ + uint16_t nb_rx_desc; /**< number of RX descriptors. */ + uint16_t rx_tail; /**< current value of RDT register. */ + uint16_t nb_rx_hold; /**< number of held free RX desc. */ + uint16_t rx_free_thresh; /**< max free RX desc to hold. */ + uint16_t queue_id; /**< RX queue index. */ + uint16_t reg_idx; /**< RX queue register index. */ + uint16_t port_id; /**< Device port identifier. */ + uint8_t pthresh; /**< Prefetch threshold register. */ + uint8_t hthresh; /**< Host threshold register. */ + uint8_t wthresh; /**< Write-back threshold register. */ + uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */ + uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */ + uint32_t flags; /**< RX flags. */ + uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */ + struct timespec timestamp[2]; /**< timestamp of last packet. */ +}; + +/** Offload features */ +union igc_tx_offload { + uint64_t data; + struct { + uint64_t l3_len:9; /**< L3 (IP) Header Length. */ + uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t vlan_tci:16; + /**< VLAN Tag Control Identifier(CPU order). */ + uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ + uint64_t tso_segsz:16; /**< TCP TSO segment size. */ + /* uint64_t unused:8; */ + }; +}; + +/* + * Compare mask for igb_tx_offload.data, + * should be in sync with igb_tx_offload layout. + */ +#define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */ +#define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */ +#define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */ +#define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */ +/** Mac + IP + TCP + Mss mask. */ +#define TX_TSO_CMP_MASK \ + (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK) + +/** + * Strucutre to check if new context need be built + */ +struct igc_advctx_info { + uint64_t flags; /**< ol_flags related to context build. */ + /** tx offload: vlan, tso, l2-l3-l4 lengths. */ + union igc_tx_offload tx_offload; + /** compare mask for tx offload. */ + union igc_tx_offload tx_offload_mask; +}; + +/** + * Hardware context number + */ +enum { + IGC_CTX_0 = 0, /**< CTX0 */ + IGC_CTX_1 = 1, /**< CTX1 */ + IGC_CTX_NUM = 2, /**< CTX_NUM */ +}; + +/** + * Structure associated with each descriptor of the TX ring of a TX queue. + */ +struct igc_tx_entry { + struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */ + uint16_t next_id; /**< Index of next descriptor in ring. */ + uint16_t last_id; /**< Index of last scattered descriptor. */ +}; + +/** + * Structure associated with each TX queue. + */ +struct igc_tx_queue { + volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */ + uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */ + struct igc_tx_entry *sw_ring; /**< virtual address of SW ring. */ + volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */ + uint32_t txd_type; /**< Device-specific TXD type */ + uint16_t nb_tx_desc; /**< number of TX descriptors. */ + uint16_t tx_tail; /**< Current value of TDT register. */ + uint16_t tx_head; + /**< Index of first used TX descriptor. */ + uint16_t queue_id; /**< TX queue index. */ + uint16_t reg_idx; /**< TX queue register index. */ + uint16_t port_id; /**< Device port identifier. */ + uint8_t pthresh; /**< Prefetch threshold register. */ + uint8_t hthresh; /**< Host threshold register. */ + uint8_t wthresh; /**< Write-back threshold register. */ + uint8_t ctx_curr; + + /**< Start context position for transmit queue. */ + struct igc_advctx_info ctx_cache[IGC_CTX_NUM]; + /**< Hardware context history.*/ + uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */ +}; + +static inline void +get_rx_pkt_timestamp(struct igc_rx_queue *rxq, struct rte_mbuf *rxm) +{ + uint32_t *time = rte_pktmbuf_mtod(rxm, uint32_t *); + + /* + * Foxville supports adding an optional tailored header before + * the MAC header of the packet in the receive buffer. The 128 + * bit tailored header include a timestamp composed of the + * packet reception time measured in the SYSTIML (Low DW) and + * SYSTIMH (High DW) registers of two selected 1588 timers. + */ + rxq->timestamp[1].tv_nsec = rte_le_to_cpu_32(time[0]); + rxq->timestamp[1].tv_sec = rte_le_to_cpu_32(time[1]); + rxq->timestamp[0].tv_nsec = rte_le_to_cpu_32(time[2]); + rxq->timestamp[0].tv_sec = rte_le_to_cpu_32(time[3]); + rxm->timestamp = rte_timespec_to_ns(&rxq->timestamp[0]); + rxm->timesync = rxq->queue_id; + rxm->data_off += IGC_TIMHDR_SIZE; + rxm->data_len -= IGC_TIMHDR_SIZE; + rxm->pkt_len -= IGC_TIMHDR_SIZE; +} + +static inline uint64_t +rx_desc_statuserr_to_pkt_flags(uint32_t statuserr) +{ + static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD, + PKT_RX_L4_CKSUM_BAD}; + + static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD, + PKT_RX_IP_CKSUM_BAD}; + uint64_t pkt_flags = 0; + uint32_t tmp; + + if (statuserr & IGC_RXD_STAT_VP) + pkt_flags |= PKT_RX_VLAN_STRIPPED; + + tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS)); + tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E); + pkt_flags |= l4_chksum_flags[tmp]; + + tmp = !!(statuserr & IGC_RXD_STAT_IPCS); + tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE); + pkt_flags |= l3_chksum_flags[tmp]; + + return pkt_flags; +} + +#define IGC_PACKET_TYPE_IPV4 0X01 +#define IGC_PACKET_TYPE_IPV4_TCP 0X11 +#define IGC_PACKET_TYPE_IPV4_UDP 0X21 +#define IGC_PACKET_TYPE_IPV4_SCTP 0X41 +#define IGC_PACKET_TYPE_IPV4_EXT 0X03 +#define IGC_PACKET_TYPE_IPV4_EXT_SCTP 0X43 +#define IGC_PACKET_TYPE_IPV6 0X04 +#define IGC_PACKET_TYPE_IPV6_TCP 0X14 +#define IGC_PACKET_TYPE_IPV6_UDP 0X24 +#define IGC_PACKET_TYPE_IPV6_EXT 0X0C +#define IGC_PACKET_TYPE_IPV6_EXT_TCP 0X1C +#define IGC_PACKET_TYPE_IPV6_EXT_UDP 0X2C +#define IGC_PACKET_TYPE_IPV4_IPV6 0X05 +#define IGC_PACKET_TYPE_IPV4_IPV6_TCP 0X15 +#define IGC_PACKET_TYPE_IPV4_IPV6_UDP 0X25 +#define IGC_PACKET_TYPE_IPV4_IPV6_EXT 0X0D +#define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D +#define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D +#define IGC_PACKET_TYPE_MAX 0X80 +#define IGC_PACKET_TYPE_MASK 0X7F +#define IGC_PACKET_TYPE_SHIFT 0X04 + +static inline uint32_t +rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info) +{ + static const uint32_t + ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = { + [IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4, + [IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4_EXT, + [IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6, + [IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6, + [IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6_EXT, + [IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT, + [IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP, + [IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP, + [IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP, + [IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP, + [IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP, + [IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP, + [IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP, + [IGC_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP, + [IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP, + [IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP, + [IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP, + [IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP, + }; + if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF)) + return RTE_PTYPE_UNKNOWN; + + pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK; + + return ptype_table[pkt_info]; +} + +static inline void +rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm, + union igc_adv_rx_desc *rxd, uint32_t staterr) +{ + uint64_t pkt_flags; + uint32_t hlen_type_rss; + uint16_t pkt_info; + + /* Prefetch data of first segment, if configured to do so. */ + rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off); + + rxm->port = rxq->port_id; + hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data); + rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss); + rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan); + + pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ? + PKT_RX_RSS_HASH : 0; + + if (hlen_type_rss & IGC_RXD_VPKT) + pkt_flags |= PKT_RX_VLAN; + + if ((hlen_type_rss & IGC_RXD_ETQF_MSK) == (IGC_RXDADV_PKTTYPE_ETQF | + (IGC_ETQF_FILTER_1588 << IGC_RXD_ETQF_SHIFT))) + pkt_flags |= PKT_RX_IEEE1588_PTP; + + pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr); + + if (unlikely(staterr & IGC_RXD_STAT_TSIP)) { + get_rx_pkt_timestamp(rxq, rxm); + pkt_flags |= PKT_RX_TIMESTAMP; + if (pkt_flags & PKT_RX_IEEE1588_PTP) + pkt_flags |= PKT_RX_IEEE1588_TMST; + } + + rxm->ol_flags = pkt_flags; + pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info); + rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info); +} + +static uint16_t +eth_igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct igc_rx_queue * const rxq = rx_queue; + volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring; + struct igc_rx_entry * const sw_ring = rxq->sw_ring; + uint16_t rx_id = rxq->rx_tail; + uint16_t nb_rx = 0; + uint16_t nb_hold = 0; + + while (nb_rx < nb_pkts) { + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_entry *rxe; + struct rte_mbuf *rxm; + struct rte_mbuf *nmb; + union igc_adv_rx_desc rxd; + uint32_t staterr; + uint16_t data_len; + + /* + * The order of operations here is important as the DD status + * bit must not be read after any other descriptor fields. + * rx_ring and rxdp are pointing to volatile data so the order + * of accesses cannot be reordered by the compiler. If they were + * not volatile, they could be reordered which could lead to + * using invalid descriptor fields when read from rxd. + */ + rxdp = &rx_ring[rx_id]; + staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error); + if (!(staterr & IGC_RXD_STAT_DD)) + break; + rxd = *rxdp; + + /* + * End of packet. + * + * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is + * likely to be invalid and to be dropped by the various + * validation checks performed by the network stack. + * + * Allocate a new mbuf to replenish the RX ring descriptor. + * If the allocation fails: + * - arrange for that RX descriptor to be the first one + * being parsed the next time the receive function is + * invoked [on the same queue]. + * + * - Stop parsing the RX ring and return immediately. + * + * This policy does not drop the packet received in the RX + * descriptor for which the allocation of a new mbuf failed. + * Thus, it allows that packet to be later retrieved if + * mbuf have been freed in the mean time. + * As a side effect, holding RX descriptors instead of + * systematically giving them back to the NIC may lead to + * RX ring exhaustion situations. + * However, the NIC can gracefully prevent such situations + * to happen by sending specific "back-pressure" flow control + * frames to its peer(s). + */ + PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u" + " staterr=0x%x data_len=%u", rxq->port_id, + rxq->queue_id, rx_id, staterr, + rte_le_to_cpu_16(rxd.wb.upper.length)); + + nmb = rte_mbuf_raw_alloc(rxq->mb_pool); + if (nmb == NULL) { + unsigned int id; + PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u" + " queue_id=%u", rxq->port_id, rxq->queue_id); + id = rxq->port_id; + rte_eth_devices[id].data->rx_mbuf_alloc_failed++; + break; + } + + nb_hold++; + rxe = &sw_ring[rx_id]; + rx_id++; + if (rx_id >= rxq->nb_rx_desc) + rx_id = 0; + + /* Prefetch next mbuf while processing current one. */ + rte_igc_prefetch(sw_ring[rx_id].mbuf); + + /* + * When next RX descriptor is on a cache-line boundary, + * prefetch the next 4 RX descriptors and the next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_igc_prefetch(&rx_ring[rx_id]); + rte_igc_prefetch(&sw_ring[rx_id]); + } + + /* + * Update RX descriptor with the physical address of the new + * data buffer of the new allocated mbuf. + */ + rxm = rxe->mbuf; + rxe->mbuf = nmb; + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxm->next = NULL; + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len; + rxm->data_len = data_len; + rxm->pkt_len = data_len; + rxm->nb_segs = 1; + + rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr); + + /* + * Store the mbuf address into the next entry of the array + * of returned packets. + */ + rx_pkts[nb_rx++] = rxm; + } + rxq->rx_tail = rx_id; + + /* + * If the number of free RX descriptors is greater than the RX free + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the + * RDH register, which creates a "full" ring situtation from the + * hardware point of view... + */ + nb_hold = nb_hold + rxq->nb_rx_hold; + if (nb_hold > rxq->rx_free_thresh) { + PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u" + " nb_hold=%u nb_rx=%u", rxq->port_id, rxq->queue_id, + rx_id, nb_hold, nb_rx); + rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1); + IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; + return nb_rx; +} + +static uint16_t +eth_igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct igc_rx_queue * const rxq = rx_queue; + volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring; + struct igc_rx_entry * const sw_ring = rxq->sw_ring; + struct rte_mbuf *first_seg = rxq->pkt_first_seg; + struct rte_mbuf *last_seg = rxq->pkt_last_seg; + + uint16_t rx_id = rxq->rx_tail; + uint16_t nb_rx = 0; + uint16_t nb_hold = 0; + + while (nb_rx < nb_pkts) { + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_entry *rxe; + struct rte_mbuf *rxm; + struct rte_mbuf *nmb; + union igc_adv_rx_desc rxd; + uint32_t staterr; + uint16_t data_len; + +next_desc: + /* + * The order of operations here is important as the DD status + * bit must not be read after any other descriptor fields. + * rx_ring and rxdp are pointing to volatile data so the order + * of accesses cannot be reordered by the compiler. If they were + * not volatile, they could be reordered which could lead to + * using invalid descriptor fields when read from rxd. + */ + rxdp = &rx_ring[rx_id]; + staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error); + if (!(staterr & IGC_RXD_STAT_DD)) + break; + rxd = *rxdp; + + /* + * Descriptor done. + * + * Allocate a new mbuf to replenish the RX ring descriptor. + * If the allocation fails: + * - arrange for that RX descriptor to be the first one + * being parsed the next time the receive function is + * invoked [on the same queue]. + * + * - Stop parsing the RX ring and return immediately. + * + * This policy does not drop the packet received in the RX + * descriptor for which the allocation of a new mbuf failed. + * Thus, it allows that packet to be later retrieved if + * mbuf have been freed in the mean time. + * As a side effect, holding RX descriptors instead of + * systematically giving them back to the NIC may lead to + * RX ring exhaustion situations. + * However, the NIC can gracefully prevent such situations + * to happen by sending specific "back-pressure" flow control + * frames to its peer(s). + */ + PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u" + " staterr=0x%x data_len=%u", rxq->port_id, + rxq->queue_id, rx_id, staterr, + rte_le_to_cpu_16(rxd.wb.upper.length)); + + nmb = rte_mbuf_raw_alloc(rxq->mb_pool); + if (nmb == NULL) { + unsigned int id; + PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u" + " queue_id=%u", rxq->port_id, rxq->queue_id); + id = rxq->port_id; + rte_eth_devices[id].data->rx_mbuf_alloc_failed++; + break; + } + + nb_hold++; + rxe = &sw_ring[rx_id]; + rx_id++; + if (rx_id >= rxq->nb_rx_desc) + rx_id = 0; + + /* Prefetch next mbuf while processing current one. */ + rte_igc_prefetch(sw_ring[rx_id].mbuf); + + /* + * When next RX descriptor is on a cache-line boundary, + * prefetch the next 4 RX descriptors and the next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_igc_prefetch(&rx_ring[rx_id]); + rte_igc_prefetch(&sw_ring[rx_id]); + } + + /* + * Update RX descriptor with the physical address of the new + * data buffer of the new allocated mbuf. + */ + rxm = rxe->mbuf; + rxe->mbuf = nmb; + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxm->next = NULL; + + /* + * Set data length & data buffer address of mbuf. + */ + rxm->data_off = RTE_PKTMBUF_HEADROOM; + data_len = rte_le_to_cpu_16(rxd.wb.upper.length); + rxm->data_len = data_len; + + /* + * If this is the first buffer of the received packet, + * set the pointer to the first mbuf of the packet and + * initialize its context. + * Otherwise, update the total length and the number of segments + * of the current scattered packet, and update the pointer to + * the last mbuf of the current packet. + */ + if (first_seg == NULL) { + first_seg = rxm; + first_seg->pkt_len = data_len; + first_seg->nb_segs = 1; + } else { + first_seg->pkt_len += data_len; + first_seg->nb_segs++; + last_seg->next = rxm; + } + + /* + * If this is not the last buffer of the received packet, + * update the pointer to the last mbuf of the current scattered + * packet and continue to parse the RX ring. + */ + if (!(staterr & IGC_RXD_STAT_EOP)) { + last_seg = rxm; + goto next_desc; + } + + /* + * This is the last buffer of the received packet. + * If the CRC is not stripped by the hardware: + * - Subtract the CRC length from the total packet length. + * - If the last buffer only contains the whole CRC or a part + * of it, free the mbuf associated to the last buffer. + * If part of the CRC is also contained in the previous + * mbuf, subtract the length of that CRC part from the + * data length of the previous mbuf. + */ + if (unlikely(rxq->crc_len > 0)) { + first_seg->pkt_len -= RTE_ETHER_CRC_LEN; + if (data_len <= RTE_ETHER_CRC_LEN) { + rte_pktmbuf_free_seg(rxm); + first_seg->nb_segs--; + last_seg->data_len = last_seg->data_len - + (RTE_ETHER_CRC_LEN - data_len); + last_seg->next = NULL; + } else { + rxm->data_len = (uint16_t) + (data_len - RTE_ETHER_CRC_LEN); + } + } + + rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr); + + /* + * Store the mbuf address into the next entry of the array + * of returned packets. + */ + rx_pkts[nb_rx++] = first_seg; + + /* Setup receipt context for a new packet. */ + first_seg = NULL; + } + rxq->rx_tail = rx_id; + + /* + * Save receive context. + */ + rxq->pkt_first_seg = first_seg; + rxq->pkt_last_seg = last_seg; + + /* + * If the number of free RX descriptors is greater than the RX free + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the + * RDH register, which creates a "full" ring situtation from the + * hardware point of view... + */ + nb_hold = nb_hold + rxq->nb_rx_hold; + if (nb_hold > rxq->rx_free_thresh) { + PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u" + " nb_hold=%u nb_rx=%u", rxq->port_id, rxq->queue_id, + rx_id, nb_hold, nb_rx); + rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1); + IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; + return nb_rx; +} + +static void +igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq) +{ + unsigned int i; + + if (rxq->sw_ring != NULL) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) { + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + rxq->sw_ring[i].mbuf = NULL; + } + } + } +} + +static void +igc_rx_queue_release(struct igc_rx_queue *rxq) +{ + igc_rx_queue_release_mbufs(rxq); + rte_free(rxq->sw_ring); + rte_free(rxq); +} + +void eth_igc_rx_queue_release(void *rxq) +{ + if (rxq) + igc_rx_queue_release(rxq); +} + +uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id) +{ + /** + * Check the DD bit of a rx descriptor of each 4 in a group, + * to avoid checking too frequently and downgrading performance + * too much. + */ +#define IGC_RXQ_SCAN_INTERVAL 4 + + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_queue *rxq; + uint16_t desc = 0; + + rxq = dev->data->rx_queues[rx_queue_id]; + rxdp = &rxq->rx_ring[rxq->rx_tail]; + + while (desc < rxq->nb_rx_desc - rxq->rx_tail) { + if (unlikely(!(rxdp->wb.upper.status_error & + IGC_RXD_STAT_DD))) + return desc; + desc += IGC_RXQ_SCAN_INTERVAL; + rxdp += IGC_RXQ_SCAN_INTERVAL; + } + rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc]; + + while (desc < rxq->nb_rx_desc && + (rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) { + desc += IGC_RXQ_SCAN_INTERVAL; + rxdp += IGC_RXQ_SCAN_INTERVAL; + } + + return desc; +} + +int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset) +{ + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_queue *rxq = rx_queue; + uint32_t desc; + + if (unlikely(offset >= rxq->nb_rx_desc)) + return 0; + + desc = rxq->rx_tail + offset; + if (desc >= rxq->nb_rx_desc) + desc -= rxq->nb_rx_desc; + + rxdp = &rxq->rx_ring[desc]; + return !!(rxdp->wb.upper.status_error & + rte_cpu_to_le_32(IGC_RXD_STAT_DD)); +} + +int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset) +{ + struct igc_rx_queue *rxq = rx_queue; + volatile uint32_t *status; + uint32_t desc; + + if (unlikely(offset >= rxq->nb_rx_desc)) + return -EINVAL; + + if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold) + return RTE_ETH_RX_DESC_UNAVAIL; + + desc = rxq->rx_tail + offset; + if (desc >= rxq->nb_rx_desc) + desc -= rxq->nb_rx_desc; + + status = &rxq->rx_ring[desc].wb.upper.status_error; + if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD)) + return RTE_ETH_RX_DESC_DONE; + + return RTE_ETH_RX_DESC_AVAIL; +} + +static int +igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq) +{ + struct igc_rx_entry *rxe = rxq->sw_ring; + uint64_t dma_addr; + unsigned int i; + + /* Initialize software ring entries. */ + for (i = 0; i < rxq->nb_rx_desc; i++) { + volatile union igc_adv_rx_desc *rxd; + struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool); + + if (mbuf == NULL) { + PMD_INIT_LOG(ERR, "RX mbuf alloc failed " + "queue_id=%hu", rxq->queue_id); + return -ENOMEM; + } + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); + rxd = &rxq->rx_ring[i]; + rxd->read.hdr_addr = 0; + rxd->read.pkt_addr = dma_addr; + rxe[i].mbuf = mbuf; + } + + return 0; +} + +/* + * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet. + * Used as the default key. + */ +static uint8_t rss_intel_key[40] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, + 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, + 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, + 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, +}; + +static void +igc_rss_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw; + uint32_t mrqc; + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + mrqc = IGC_READ_REG(hw, IGC_MRQC); + mrqc &= ~IGC_MRQC_ENABLE_MASK; + IGC_WRITE_REG(hw, IGC_MRQC, mrqc); +} + +void +igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf) +{ + uint8_t *hash_key; + uint32_t rss_key; + uint32_t mrqc; + uint64_t rss_hf; + uint16_t i; + + hash_key = rss_conf->rss_key; + if (hash_key != NULL) { + /* Fill in RSS hash key */ + for (i = 0; i < 10; i++) { + rss_key = hash_key[(i * 4)]; + rss_key |= hash_key[(i * 4) + 1] << 8; + rss_key |= hash_key[(i * 4) + 2] << 16; + rss_key |= hash_key[(i * 4) + 3] << 24; + IGC_WRITE_REG(hw, IGC_RSSRK(i), rss_key); + } + } + + /* Set configured hashing protocols in MRQC register */ + rss_hf = rss_conf->rss_hf; + mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */ + if (rss_hf & ETH_RSS_IPV4) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP; + if (rss_hf & ETH_RSS_IPV6) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6; + if (rss_hf & ETH_RSS_IPV6_EX) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP; + if (rss_hf & ETH_RSS_IPV6_TCP_EX) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; + if (rss_hf & ETH_RSS_IPV6_UDP_EX) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX; + IGC_WRITE_REG(hw, IGC_MRQC, mrqc); +} + +static void +igc_rss_configure(struct rte_eth_dev *dev) +{ + struct rte_eth_rss_conf rss_conf; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint16_t i; + + /* Fill in redirection table. */ + for (i = 0; i < 128; i++) { + union igc_reta { + uint32_t dword; + uint8_t bytes[4]; + } reta; + uint8_t q_idx; + + q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ? + i % dev->data->nb_rx_queues : 0); + reta.bytes[i & 3] = q_idx; + if ((i & 3) == 3) + IGC_WRITE_REG(hw, IGC_RETA(i >> 2), reta.dword); + } + + /* + * Configure the RSS key and the RSS protocols used to compute + * the RSS hash of input packets. + */ + rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf; + if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) { + igc_rss_disable(dev); + return; + } + + if (rss_conf.rss_key == NULL) + rss_conf.rss_key = rss_intel_key; /* Default hash key */ + igc_hw_rss_hash_set(hw, &rss_conf); +} + +static int +igc_dev_mq_rx_configure(struct rte_eth_dev *dev) +{ + if (RTE_ETH_DEV_SRIOV(dev).active) { + PMD_INIT_LOG(ERR, "SRIOV unsupported!"); + return -EINVAL; + } + + switch (dev->data->dev_conf.rxmode.mq_mode) { + case ETH_MQ_RX_RSS: + igc_rss_configure(dev); + break; + case ETH_MQ_RX_NONE: + igc_rss_disable(dev); + break; + default: + PMD_INIT_LOG(ERR, "rx mode(%d) not supported!", + dev->data->dev_conf.rxmode.mq_mode); + return -EINVAL; + } + return 0; +} + +int +igc_rx_init(struct rte_eth_dev *dev) +{ + struct igc_rx_queue *rxq; + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + const uint64_t offloads = dev->data->dev_conf.rxmode.offloads; + uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len; + uint32_t rctl; + uint32_t rxcsum; + uint16_t buf_size; + uint16_t rctl_bsize; + uint16_t i; + int ret; + + dev->rx_pkt_burst = eth_igc_recv_pkts; + + /* + * Make sure receives are disabled while setting + * up the descriptor ring. + */ + rctl = IGC_READ_REG(hw, IGC_RCTL); + IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN); + + /* Configure support of jumbo frames, if any. */ + if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { + rctl |= IGC_RCTL_LPE; + + /* + * Set maximum packet length by default, and might be updated + * together with enabling/disabling dual VLAN. + */ + IGC_WRITE_REG(hw, IGC_RLPML, + max_rx_pkt_len + VLAN_TAG_SIZE); + } else { + rctl &= ~IGC_RCTL_LPE; + } + + /* Configure and enable each RX queue. */ + rctl_bsize = 0; + for (i = 0; i < dev->data->nb_rx_queues; i++) { + uint64_t bus_addr; + uint32_t rxdctl; + uint32_t srrctl; + + rxq = dev->data->rx_queues[i]; + rxq->flags = 0; + + /* Allocate buffers for descriptor rings and set up queue */ + ret = igc_alloc_rx_queue_mbufs(rxq); + if (ret) + return ret; + + /* + * Reset crc_len in case it was changed after queue setup by a + * call to configure + */ + rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ? + RTE_ETHER_CRC_LEN : 0; + + bus_addr = rxq->rx_ring_phys_addr; + IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx), + rxq->nb_rx_desc * + sizeof(union igc_adv_rx_desc)); + IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx), + (uint32_t)(bus_addr >> 32)); + IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx), + (uint32_t)bus_addr); + + /* set descriptor configuration */ + srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; + + srrctl |= (RTE_PKTMBUF_HEADROOM / 64) << + IGC_SRRCTL_BSIZEHEADER_SHIFT; + /* + * Configure RX buffer size. + */ + buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - + RTE_PKTMBUF_HEADROOM); + if (buf_size >= 1024) { + /* + * Configure the BSIZEPACKET field of the SRRCTL + * register of the queue. + * Value is in 1 KB resolution, from 1 KB to 16 KB. + * If this field is equal to 0b, then RCTL.BSIZE + * determines the RX packet buffer size. + */ + + srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) & + IGC_SRRCTL_BSIZEPKT_MASK); + buf_size = (uint16_t)((srrctl & + IGC_SRRCTL_BSIZEPKT_MASK) << + IGC_SRRCTL_BSIZEPKT_SHIFT); + + /* It adds dual VLAN length for supporting dual VLAN */ + if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size) + dev->data->scattered_rx = 1; + } else { + /* + * Use BSIZE field of the device RCTL register. + */ + if (rctl_bsize == 0 || rctl_bsize > buf_size) + rctl_bsize = buf_size; + dev->data->scattered_rx = 1; + } + + /* Set if packets are dropped when no descriptors available */ + if (rxq->drop_en) + srrctl |= IGC_SRRCTL_DROP_EN; + + IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl); + + /* Enable this RX queue. */ + rxdctl = IGC_RXDCTL_QUEUE_ENABLE; + rxdctl |= ((u32)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) & + IGC_RXDCTL_PTHRESH_MSK; + rxdctl |= ((u32)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) & + IGC_RXDCTL_HTHRESH_MSK; + rxdctl |= ((u32)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) & + IGC_RXDCTL_WTHRESH_MSK; + IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl); + } + + if (offloads & DEV_RX_OFFLOAD_SCATTER) + dev->data->scattered_rx = 1; + + if (dev->data->scattered_rx) { + PMD_INIT_LOG(DEBUG, "forcing scatter mode"); + dev->rx_pkt_burst = eth_igc_recv_scattered_pkts; + } + /* + * Setup BSIZE field of RCTL register, if needed. + * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL + * register, since the code above configures the SRRCTL register of + * the RX queue in such a case. + * All configurable sizes are: + * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX); + * 8192: rctl |= (IGC_RCTL_SZ_8192 | IGC_RCTL_BSEX); + * 4096: rctl |= (IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX); + * 2048: rctl |= IGC_RCTL_SZ_2048; + * 1024: rctl |= IGC_RCTL_SZ_1024; + * 512: rctl |= IGC_RCTL_SZ_512; + * 256: rctl |= IGC_RCTL_SZ_256; + */ + if (rctl_bsize > 0) { + if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */ + rctl |= IGC_RCTL_SZ_512; + else /* 256 <= buf_size < 512 - use 256 */ + rctl |= IGC_RCTL_SZ_256; + } + + /* + * Configure RSS if device configured with multiple RX queues. + */ + igc_dev_mq_rx_configure(dev); + + /* Update the rctl since igc_dev_mq_rx_configure may change its value */ + rctl |= IGC_READ_REG(hw, IGC_RCTL); + + /* + * Setup the Checksum Register. + * Receive Full-Packet Checksum Offload is mutually exclusive with RSS. + */ + rxcsum = IGC_READ_REG(hw, IGC_RXCSUM); + rxcsum |= IGC_RXCSUM_PCSD; + + /* Enable both L3/L4 rx checksum offload */ + if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) + rxcsum |= IGC_RXCSUM_IPOFL; + else + rxcsum &= ~IGC_RXCSUM_IPOFL; + if (offloads & + (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) + rxcsum |= IGC_RXCSUM_TUOFL; + else + rxcsum &= ~IGC_RXCSUM_TUOFL; + if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM) + rxcsum |= IGC_RXCSUM_CRCOFL; + else + rxcsum &= ~IGC_RXCSUM_CRCOFL; + + IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum); + + /* Setup the Receive Control Register. */ + if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { + rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */ + + /* clear STRCRC bit in all queues */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + uint32_t dvmolr = IGC_READ_REG(hw, + IGC_DVMOLR(rxq->reg_idx)); + dvmolr &= ~IGC_DVMOLR_STRCRC; + IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr); + } + } else { + rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */ + + /* set STRCRC bit in all queues */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + uint32_t dvmolr = IGC_READ_REG(hw, + IGC_DVMOLR(rxq->reg_idx)); + dvmolr |= IGC_DVMOLR_STRCRC; + IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr); + } + } + + rctl &= ~IGC_RCTL_MO_MSK; + rctl &= ~IGC_RCTL_LBM_MSK; + rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO | + IGC_RCTL_DPF | + (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); + + rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI | + IGC_RCTL_PSP | IGC_RCTL_PMCF); + + /* Make sure VLAN Filters are off. */ + rctl &= ~IGC_RCTL_VFE; + /* Don't store bad packets. */ + rctl &= ~IGC_RCTL_SBP; + + /* Enable Receives. */ + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + + /* + * Setup the HW Rx Head and Tail Descriptor Pointers. + * This needs to be done after enable. + */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0); + IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx), + rxq->nb_rx_desc - 1); + } + + return 0; +} + +static void +igc_reset_rx_queue(struct igc_rx_queue *rxq) +{ + static const union igc_adv_rx_desc zeroed_desc = { {0} }; + unsigned int i; + + /* Zero out HW ring memory */ + for (i = 0; i < rxq->nb_rx_desc; i++) + rxq->rx_ring[i] = zeroed_desc; + + rxq->rx_tail = 0; + rxq->pkt_first_seg = NULL; + rxq->pkt_last_seg = NULL; +} + +void +eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev, + uint16_t rx_queue_id, int on) +{ + struct igc_hw *hw = + IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id]; + uint32_t reg_val; + + if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) { + PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u", + rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1); + return; + } + + reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id)); + if (on) { + /* If vlan been stripped off, the CRC is meaningless. */ + reg_val |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC; + rxq->offloads |= ETH_VLAN_STRIP_MASK; + } else { + reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN); + if (dev->data->dev_conf.rxmode.offloads & ETH_VLAN_STRIP_MASK) + rxq->offloads &= ~ETH_VLAN_STRIP_MASK; + } + + IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val); +} + +int +eth_igc_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + const struct rte_memzone *rz; + struct igc_rx_queue *rxq; + struct igc_hw *hw; + unsigned int size; + uint64_t offloads; + + offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* + * Validate number of receive descriptors. + * It must not exceed hardware maximum, and must be multiple + * of IGC_RX_DESCRIPTOR_MULTIPLE. + */ + if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 || + nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) { + PMD_INIT_LOG(ERR, "RX descriptor must be multiple of" + " %u(cur: %u) and between %u and %u!", + IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc, + IGC_MIN_RXD, IGC_MAX_RXD); + return -EINVAL; + } + + /* Free memory prior to re-allocation if needed */ + if (dev->data->rx_queues[queue_idx] != NULL) { + igc_rx_queue_release(dev->data->rx_queues[queue_idx]); + dev->data->rx_queues[queue_idx] = NULL; + } + + /* First allocate the RX queue data structure. */ + rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue), + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) + return -ENOMEM; + rxq->offloads = offloads; + rxq->mb_pool = mp; + rxq->nb_rx_desc = nb_desc; + rxq->pthresh = rx_conf->rx_thresh.pthresh; + rxq->hthresh = rx_conf->rx_thresh.hthresh; + rxq->wthresh = rx_conf->rx_thresh.wthresh; + rxq->drop_en = rx_conf->rx_drop_en; + rxq->rx_free_thresh = rx_conf->rx_free_thresh; + rxq->queue_id = queue_idx; + rxq->reg_idx = queue_idx; + rxq->port_id = dev->data->port_id; + + /* + * Allocate RX ring hardware descriptors. A memzone large enough to + * handle the maximum ring size is allocated in order to allow for + * resizing in later calls to the queue setup function. + */ + size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD; + rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size, + IGC_ALIGN, socket_id); + if (rz == NULL) { + igc_rx_queue_release(rxq); + return -ENOMEM; + } + rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx)); + rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx)); + rxq->rx_ring_phys_addr = rz->iova; + rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr; + + /* Allocate software ring. */ + rxq->sw_ring = rte_zmalloc("rxq->sw_ring", + sizeof(struct igc_rx_entry) * nb_desc, + RTE_CACHE_LINE_SIZE); + if (rxq->sw_ring == NULL) { + igc_rx_queue_release(rxq); + return -ENOMEM; + } + + PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64, + rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr); + + dev->data->rx_queues[queue_idx] = rxq; + igc_reset_rx_queue(rxq); + + return 0; +} + +/* prepare packets for transmit */ +static uint16_t +eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + /* Check some limitations for TSO in hardware */ + if (m->ol_flags & PKT_TX_TCP_SEG) + if (m->tso_segsz > IGC_TSO_MAX_MSS || + m->l2_len + m->l3_len + m->l4_len > + IGC_TSO_MAX_HDRLEN) { + rte_errno = EINVAL; + return i; + } + + if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } + } + + return i; +} + +/* + *There're some limitations in hardware for TCP segmentation offload. We + *should check whether the parameters are valid. + */ +static inline uint64_t +check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para) +{ + if (!(ol_req & PKT_TX_TCP_SEG)) + return ol_req; + if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len + + ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) { + ol_req &= ~PKT_TX_TCP_SEG; + ol_req |= PKT_TX_TCP_CKSUM; + } + return ol_req; +} + +/* + * Check which hardware context can be used. Use the existing match + * or create a new context descriptor. + */ +static inline uint32_t +what_advctx_update(struct igc_tx_queue *txq, uint64_t flags, + union igc_tx_offload tx_offload) +{ + uint32_t curr = txq->ctx_curr; + + /* If match with the current context */ + if (likely(txq->ctx_cache[curr].flags == flags && + txq->ctx_cache[curr].tx_offload.data == + (txq->ctx_cache[curr].tx_offload_mask.data & + tx_offload.data))) { + return curr; + } + + /* Total two context, if match with the second context */ + curr ^= 1; + if (likely(txq->ctx_cache[curr].flags == flags && + txq->ctx_cache[curr].tx_offload.data == + (txq->ctx_cache[curr].tx_offload_mask.data & + tx_offload.data))) { + txq->ctx_curr = curr; + return curr; + } + + /* Mismatch, create new one */ + return IGC_CTX_NUM; +} + +/* + * This is a separate function, looking for optimization opportunity here + * Rework required to go with the pre-defined values. + */ +static inline void +igc_set_xmit_ctx(struct igc_tx_queue *txq, + volatile struct igc_adv_tx_context_desc *ctx_txd, + uint64_t ol_flags, union igc_tx_offload tx_offload) +{ + uint32_t type_tucmd_mlhl; + uint32_t mss_l4len_idx; + uint32_t ctx_curr; + uint32_t vlan_macip_lens; + union igc_tx_offload tx_offload_mask; + + /* Use the previous context */ + txq->ctx_curr ^= 1; + ctx_curr = txq->ctx_curr; + + tx_offload_mask.data = 0; + type_tucmd_mlhl = 0; + + /* Specify which HW CTX to upload. */ + mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT); + + if (ol_flags & PKT_TX_VLAN_PKT) + tx_offload_mask.vlan_tci = 0xffff; + + /* check if TCP segmentation required for this packet */ + if (ol_flags & PKT_TX_TCP_SEG) { + /* implies IP cksum in IPv4 */ + if (ol_flags & PKT_TX_IP_CKSUM) + type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 | + IGC_ADVTXD_TUCMD_L4T_TCP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + else + type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 | + IGC_ADVTXD_TUCMD_L4T_TCP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + + tx_offload_mask.data |= TX_TSO_CMP_MASK; + mss_l4len_idx |= tx_offload.tso_segsz << IGC_ADVTXD_MSS_SHIFT; + mss_l4len_idx |= tx_offload.l4_len << IGC_ADVTXD_L4LEN_SHIFT; + } else { /* no TSO, check if hardware checksum is needed */ + if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)) + tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK; + + if (ol_flags & PKT_TX_IP_CKSUM) + type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4; + + switch (ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= sizeof(struct rte_tcp_hdr) + << IGC_ADVTXD_L4LEN_SHIFT; + break; + case PKT_TX_UDP_CKSUM: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= sizeof(struct rte_udp_hdr) + << IGC_ADVTXD_L4LEN_SHIFT; + break; + case PKT_TX_SCTP_CKSUM: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= sizeof(struct rte_sctp_hdr) + << IGC_ADVTXD_L4LEN_SHIFT; + break; + default: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + break; + } + } + + txq->ctx_cache[ctx_curr].flags = ol_flags; + txq->ctx_cache[ctx_curr].tx_offload.data = + tx_offload_mask.data & tx_offload.data; + txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask; + + ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl); + vlan_macip_lens = (uint32_t)tx_offload.data; + ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens); + ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx); + ctx_txd->u.launch_time = 0; +} + +static inline uint32_t +tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags) +{ + uint32_t cmdtype; + static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE}; + static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE}; + cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0]; + cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0]; + return cmdtype; +} + +static inline uint32_t +tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags) +{ + static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM}; + static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM}; + uint32_t tmp; + + tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM]; + tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0]; + tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0]; + return tmp; +} + +static uint16_t +eth_igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct igc_tx_queue * const txq = tx_queue; + struct igc_tx_entry * const sw_ring = txq->sw_ring; + struct igc_tx_entry *txe, *txn; + volatile union igc_adv_tx_desc * const txr = txq->tx_ring; + volatile union igc_adv_tx_desc *txd; + struct rte_mbuf *tx_pkt; + struct rte_mbuf *m_seg; + uint64_t buf_dma_addr; + uint32_t olinfo_status; + uint32_t cmd_type_len; + uint32_t pkt_len; + uint16_t slen; + uint64_t ol_flags; + uint16_t tx_end; + uint16_t tx_id; + uint16_t tx_last; + uint16_t nb_tx; + uint64_t tx_ol_req; + uint32_t new_ctx = 0; + union igc_tx_offload tx_offload = {0}; + + tx_id = txq->tx_tail; + txe = &sw_ring[tx_id]; + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + tx_pkt = *tx_pkts++; + pkt_len = tx_pkt->pkt_len; + + RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); + + /* + * The number of descriptors that must be allocated for a + * packet is the number of segments of that packet, plus 1 + * Context Descriptor for the VLAN Tag Identifier, if any. + * Determine the last TX descriptor to allocate in the TX ring + * for the packet, starting from the current position (tx_id) + * in the ring. + */ + tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1); + + ol_flags = tx_pkt->ol_flags; + tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK; + + /* If a Context Descriptor need be built . */ + if (tx_ol_req) { + tx_offload.l2_len = tx_pkt->l2_len; + tx_offload.l3_len = tx_pkt->l3_len; + tx_offload.l4_len = tx_pkt->l4_len; + tx_offload.vlan_tci = tx_pkt->vlan_tci; + tx_offload.tso_segsz = tx_pkt->tso_segsz; + tx_ol_req = check_tso_para(tx_ol_req, tx_offload); + + new_ctx = what_advctx_update(txq, tx_ol_req, + tx_offload); + /* Only allocate context descriptor if required*/ + new_ctx = (new_ctx >= IGC_CTX_NUM); + tx_last = (uint16_t)(tx_last + new_ctx); + } + if (tx_last >= txq->nb_tx_desc) + tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); + + PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u" + " tx_first=%u tx_last=%u", txq->port_id, txq->queue_id, + pkt_len, tx_id, tx_last); + + /* + * Check if there are enough free descriptors in the TX ring + * to transmit the next packet. + * This operation is based on the two following rules: + * + * 1- Only check that the last needed TX descriptor can be + * allocated (by construction, if that descriptor is free, + * all intermediate ones are also free). + * + * For this purpose, the index of the last TX descriptor + * used for a packet (the "last descriptor" of a packet) + * is recorded in the TX entries (the last one included) + * that are associated with all TX descriptors allocated + * for that packet. + * + * 2- Avoid to allocate the last free TX descriptor of the + * ring, in order to never set the TDT register with the + * same value stored in parallel by the NIC in the TDH + * register, which makes the TX engine of the NIC enter + * in a deadlock situation. + * + * By extension, avoid to allocate a free descriptor that + * belongs to the last set of free descriptors allocated + * to the same packet previously transmitted. + */ + + /* + * The "last descriptor" of the previously sent packet, if any, + * which used the last descriptor to allocate. + */ + tx_end = sw_ring[tx_last].last_id; + + /* + * The next descriptor following that "last descriptor" in the + * ring. + */ + tx_end = sw_ring[tx_end].next_id; + + /* + * The "last descriptor" associated with that next descriptor. + */ + tx_end = sw_ring[tx_end].last_id; + + /* + * Check that this descriptor is free. + */ + if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) { + if (nb_tx == 0) + return 0; + goto end_of_tx; + } + + /* + * Set common flags of all TX Data Descriptors. + * + * The following bits must be set in all Data Descriptors: + * - IGC_ADVTXD_DTYP_DATA + * - IGC_ADVTXD_DCMD_DEXT + * + * The following bits must be set in the first Data Descriptor + * and are ignored in the other ones: + * - IGC_ADVTXD_DCMD_IFCS + * - IGC_ADVTXD_MAC_1588 + * - IGC_ADVTXD_DCMD_VLE + * + * The following bits must only be set in the last Data + * Descriptor: + * - IGC_TXD_CMD_EOP + * + * The following bits can be set in any Data Descriptor, but + * are only set in the last Data Descriptor: + * - IGC_TXD_CMD_RS + */ + cmd_type_len = txq->txd_type | + IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT; + if (tx_ol_req & PKT_TX_TCP_SEG) + pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + + tx_pkt->l4_len); + olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT); + + /* + * Timer 0 should be used to for packet timestamping, + * sample the packet timestamp to reg 0 + */ + if (ol_flags & PKT_TX_IEEE1588_TMST) + cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP; + + if (tx_ol_req) { + /* Setup TX Advanced context descriptor if required */ + if (new_ctx) { + volatile struct igc_adv_tx_context_desc * + ctx_txd = (volatile struct + igc_adv_tx_context_desc *)&txr[tx_id]; + + txn = &sw_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + + if (txe->mbuf != NULL) { + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = NULL; + } + + igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req, + tx_offload); + + txe->last_id = tx_last; + tx_id = txe->next_id; + txe = txn; + } + + /* Setup the TX Advanced Data Descriptor */ + cmd_type_len |= + tx_desc_vlan_flags_to_cmdtype(tx_ol_req); + olinfo_status |= + tx_desc_cksum_flags_to_olinfo(tx_ol_req); + olinfo_status |= (txq->ctx_curr << + IGC_ADVTXD_IDX_SHIFT); + } + + m_seg = tx_pkt; + do { + txn = &sw_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + + txd = &txr[tx_id]; + + if (txe->mbuf != NULL) + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = m_seg; + + /* Set up transmit descriptor */ + slen = (uint16_t)m_seg->data_len; + buf_dma_addr = rte_mbuf_data_iova(m_seg); + txd->read.buffer_addr = + rte_cpu_to_le_64(buf_dma_addr); + txd->read.cmd_type_len = + rte_cpu_to_le_32(cmd_type_len | slen); + txd->read.olinfo_status = + rte_cpu_to_le_32(olinfo_status); + txe->last_id = tx_last; + tx_id = txe->next_id; + txe = txn; + m_seg = m_seg->next; + } while (m_seg != NULL); + + /* + * The last packet data descriptor needs End Of Packet (EOP) + * and Report Status (RS). + */ + txd->read.cmd_type_len |= + rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS); + } +end_of_tx: + rte_wmb(); + + /* + * Set the Transmit Descriptor Tail (TDT). + */ + IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); + PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", + txq->port_id, txq->queue_id, tx_id, nb_tx); + txq->tx_tail = tx_id; + + return nb_tx; +} + +int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset) +{ + struct igc_tx_queue *txq = tx_queue; + volatile uint32_t *status; + uint32_t desc; + + if (unlikely(offset >= txq->nb_tx_desc)) + return -EINVAL; + + desc = txq->tx_tail + offset; + if (desc >= txq->nb_tx_desc) + desc -= txq->nb_tx_desc; + + status = &txq->tx_ring[desc].wb.status; + if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD)) + return RTE_ETH_TX_DESC_DONE; + + return RTE_ETH_TX_DESC_FULL; +} + +static void +igc_tx_queue_release_mbufs(struct igc_tx_queue *txq) +{ + unsigned int i; + + if (txq->sw_ring != NULL) { + for (i = 0; i < txq->nb_tx_desc; i++) { + if (txq->sw_ring[i].mbuf != NULL) { + rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf); + txq->sw_ring[i].mbuf = NULL; + } + } + } +} + +static void +igc_tx_queue_release(struct igc_tx_queue *txq) +{ + igc_tx_queue_release_mbufs(txq); + rte_free(txq->sw_ring); + rte_free(txq); +} + +void eth_igc_tx_queue_release(void *txq) +{ + if (txq) + igc_tx_queue_release(txq); +} + +static void +igc_reset_tx_queue_stat(struct igc_tx_queue *txq) +{ + txq->tx_head = 0; + txq->tx_tail = 0; + txq->ctx_curr = 0; + memset((void *)&txq->ctx_cache, 0, + IGC_CTX_NUM * sizeof(struct igc_advctx_info)); +} + +static void +igc_reset_tx_queue(struct igc_tx_queue *txq) +{ + struct igc_tx_entry *txe = txq->sw_ring; + uint16_t i, prev; + + /* Initialize ring entries */ + prev = (uint16_t)(txq->nb_tx_desc - 1); + for (i = 0; i < txq->nb_tx_desc; i++) { + volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i]; + + txd->wb.status = IGC_TXD_STAT_DD; + txe[i].mbuf = NULL; + txe[i].last_id = i; + txe[prev].next_id = i; + prev = i; + } + + txq->txd_type = IGC_ADVTXD_DTYP_DATA; + igc_reset_tx_queue_stat(txq); +} + +/* + * clear all rx/tx queue + */ +void +igc_dev_clear_queues(struct rte_eth_dev *dev) +{ + uint16_t i; + struct igc_tx_queue *txq; + struct igc_rx_queue *rxq; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + if (txq != NULL) { + igc_tx_queue_release_mbufs(txq); + igc_reset_tx_queue(txq); + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (rxq != NULL) { + igc_rx_queue_release_mbufs(rxq); + igc_reset_rx_queue(rxq); + } + } +} + +int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + const struct rte_memzone *tz; + struct igc_tx_queue *txq; + struct igc_hw *hw; + uint32_t size; + + if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 || + nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) { + PMD_INIT_LOG(ERR, "TX-descriptor must be a multiple of " + "%u and between %u and %u!, cur: %u", + IGC_TX_DESCRIPTOR_MULTIPLE, + IGC_MAX_TXD, IGC_MIN_TXD, nb_desc); + return -EINVAL; + } + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* + * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G + * driver. + */ + if (tx_conf->tx_free_thresh != 0) + PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not " + "used for the 2.5G driver."); + if (tx_conf->tx_rs_thresh != 0) + PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not " + "used for the 2.5G driver."); + if (tx_conf->tx_thresh.wthresh == 0) + PMD_INIT_LOG(INFO, "To improve 2.5G driver performance, " + "consider setting the TX WTHRESH value to 4, 8, or 16."); + + /* Free memory prior to re-allocation if needed */ + if (dev->data->tx_queues[queue_idx] != NULL) { + igc_tx_queue_release(dev->data->tx_queues[queue_idx]); + dev->data->tx_queues[queue_idx] = NULL; + } + + /* First allocate the tx queue data structure */ + txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue), + RTE_CACHE_LINE_SIZE); + if (txq == NULL) + return -ENOMEM; + + /* + * Allocate TX ring hardware descriptors. A memzone large enough to + * handle the maximum ring size is allocated in order to allow for + * resizing in later calls to the queue setup function. + */ + size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD; + tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size, + IGC_ALIGN, socket_id); + if (tz == NULL) { + igc_tx_queue_release(txq); + return -ENOMEM; + } + + txq->nb_tx_desc = nb_desc; + txq->pthresh = tx_conf->tx_thresh.pthresh; + txq->hthresh = tx_conf->tx_thresh.hthresh; + txq->wthresh = tx_conf->tx_thresh.wthresh; + + txq->queue_id = queue_idx; + txq->reg_idx = queue_idx; + txq->port_id = dev->data->port_id; + + txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx)); + txq->tx_ring_phys_addr = tz->iova; + + txq->tx_ring = (union igc_adv_tx_desc *)tz->addr; + /* Allocate software ring */ + txq->sw_ring = rte_zmalloc("txq->sw_ring", + sizeof(struct igc_tx_entry) * nb_desc, + RTE_CACHE_LINE_SIZE); + if (txq->sw_ring == NULL) { + igc_tx_queue_release(txq); + return -ENOMEM; + } + PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64, + txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr); + + igc_reset_tx_queue(txq); + dev->tx_pkt_burst = eth_igc_xmit_pkts; + dev->tx_pkt_prepare = ð_igc_prep_pkts; + dev->data->tx_queues[queue_idx] = txq; + txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; + + return 0; +} + +int +eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt) +{ + struct igc_tx_queue *txq = txqueue; + struct igc_tx_entry *sw_ring; + volatile union igc_adv_tx_desc *txr; + uint16_t tx_first; /* First segment analyzed. */ + uint16_t tx_id; /* Current segment being processed. */ + uint16_t tx_last; /* Last segment in the current packet. */ + uint16_t tx_next; /* First segment of the next packet. */ + uint32_t count; + + if (txq == NULL) + return -ENODEV; + + count = 0; + sw_ring = txq->sw_ring; + txr = txq->tx_ring; + + /* + * tx_tail is the last sent packet on the sw_ring. Goto the end + * of that packet (the last segment in the packet chain) and + * then the next segment will be the start of the oldest segment + * in the sw_ring. This is the first packet that will be + * attempted to be freed. + */ + + /* Get last segment in most recently added packet. */ + tx_first = sw_ring[txq->tx_tail].last_id; + + /* Get the next segment, which is the oldest segment in ring. */ + tx_first = sw_ring[tx_first].next_id; + + /* Set the current index to the first. */ + tx_id = tx_first; + + /* + * Loop through each packet. For each packet, verify that an + * mbuf exists and that the last segment is free. If so, free + * it and move on. + */ + while (1) { + tx_last = sw_ring[tx_id].last_id; + + if (sw_ring[tx_last].mbuf) { + if (!(txr[tx_last].wb.status & + rte_cpu_to_le_32(IGC_TXD_STAT_DD))) + break; + + /* Get the start of the next packet. */ + tx_next = sw_ring[tx_last].next_id; + + /* + * Loop through all segments in a + * packet. + */ + do { + rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf); + sw_ring[tx_id].mbuf = NULL; + sw_ring[tx_id].last_id = tx_id; + + /* Move to next segemnt. */ + tx_id = sw_ring[tx_id].next_id; + } while (tx_id != tx_next); + + /* + * Increment the number of packets + * freed. + */ + count++; + if (unlikely(count == free_cnt)) + break; + } else { + /* + * There are multiple reasons to be here: + * 1) All the packets on the ring have been + * freed - tx_id is equal to tx_first + * and some packets have been freed. + * - Done, exit + * 2) Interfaces has not sent a rings worth of + * packets yet, so the segment after tail is + * still empty. Or a previous call to this + * function freed some of the segments but + * not all so there is a hole in the list. + * Hopefully this is a rare case. + * - Walk the list and find the next mbuf. If + * there isn't one, then done. + */ + if (likely(tx_id == tx_first && count != 0)) + break; + + /* + * Walk the list and find the next mbuf, if any. + */ + do { + /* Move to next segemnt. */ + tx_id = sw_ring[tx_id].next_id; + + if (sw_ring[tx_id].mbuf) + break; + + } while (tx_id != tx_first); + + /* + * Determine why previous loop bailed. If there + * is not an mbuf, done. + */ + if (sw_ring[tx_id].mbuf == NULL) + break; + } + } + + return count; +} + +void +igc_tx_init(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t tctl; + uint32_t txdctl; + uint16_t i; + + /* Setup the Base and Length of the Tx Descriptor Rings. */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct igc_tx_queue *txq = dev->data->tx_queues[i]; + uint64_t bus_addr = txq->tx_ring_phys_addr; + + IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx), + txq->nb_tx_desc * + sizeof(union igc_adv_tx_desc)); + IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx), + (uint32_t)(bus_addr >> 32)); + IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx), + (uint32_t)bus_addr); + + /* Setup the HW Tx Head and Tail descriptor pointers. */ + IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0); + IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0); + + /* Setup Transmit threshold registers. */ + txdctl = ((u32)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) & + IGC_TXDCTL_PTHRESH_MSK; + txdctl |= ((u32)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) & + IGC_TXDCTL_HTHRESH_MSK; + txdctl |= ((u32)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) & + IGC_TXDCTL_WTHRESH_MSK; + txdctl |= IGC_TXDCTL_QUEUE_ENABLE; + IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl); + } + + igc_config_collision_dist(hw); + + /* Program the Transmit Control Register. */ + tctl = IGC_READ_REG(hw, IGC_TCTL); + tctl &= ~IGC_TCTL_CT; + tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN | + (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT)); + + /* This write will effectively turn on the transmit unit. */ + IGC_WRITE_REG(hw, IGC_TCTL, tctl); +} + +void +eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct igc_rx_queue *rxq; + + rxq = dev->data->rx_queues[queue_id]; + + qinfo->mp = rxq->mb_pool; + qinfo->scattered_rx = dev->data->scattered_rx; + qinfo->nb_desc = rxq->nb_rx_desc; + + qinfo->conf.rx_free_thresh = rxq->rx_free_thresh; + qinfo->conf.rx_drop_en = rxq->drop_en; + qinfo->conf.offloads = rxq->offloads; +} + +void +eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct igc_tx_queue *txq; + + txq = dev->data->tx_queues[queue_id]; + + qinfo->nb_desc = txq->nb_tx_desc; + + qinfo->conf.tx_thresh.pthresh = txq->pthresh; + qinfo->conf.tx_thresh.hthresh = txq->hthresh; + qinfo->conf.tx_thresh.wthresh = txq->wthresh; + qinfo->conf.offloads = txq->offloads; +} + +int +eth_igc_timesync_read_rx_timestamp(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t queue_idx) +{ + struct igc_rx_queue *rxq; + + if (queue_idx >= IGC_QUEUE_PAIRS_NUM) { + PMD_DRV_LOG(ERR, "Error queue(%u), expect it smaller than %u", + queue_idx, IGC_QUEUE_PAIRS_NUM); + return -EINVAL; + } + + rxq = dev->data->tx_queues[queue_idx]; + *timestamp = rxq->timestamp[0]; + return 0; +} + +/* + * Place timestamp at the beginning of a receive buffer. + */ +int +igc_enable_rx_queue_timestamp(struct rte_eth_dev *dev, uint16_t queue_idx) +{ + struct igc_hw *hw; + struct igc_rx_queue *rxq; + uint32_t srrctl; + + if (queue_idx >= IGC_QUEUE_PAIRS_NUM) { + PMD_DRV_LOG(ERR, "Error queue(%u), expect it smaller than %u", + queue_idx, IGC_QUEUE_PAIRS_NUM); + return -EINVAL; + } + + hw = IGC_DEV_PRIVATE_TO_HW(dev->data->dev_private); + rxq = dev->data->rx_queues[queue_idx]; + srrctl = IGC_READ_REG(hw, IGC_SRRCTL(rxq->reg_idx)); + + /* select timer 0 to report timestamp. */ + srrctl &= ~(IGC_SRRCTL_TIME1_MSK | IGC_SRRCTL_TIME0_MSK); + srrctl |= IGC_SRRCTL_TIMESTAMP_EN; + IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl); + return 0; +} + + diff --git a/drivers/net/igc/igc_txrx.h b/drivers/net/igc/igc_txrx.h new file mode 100644 index 0000000..240f276 --- /dev/null +++ b/drivers/net/igc/igc_txrx.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#ifndef _IGC_TXRX_H_ +#define _IGC_TXRX_H_ + +#include "igc_ethdev.h" + +/* + * RX/TX function prototypes + */ +void eth_igc_tx_queue_release(void *txq); +void eth_igc_rx_queue_release(void *rxq); +void igc_dev_clear_queues(struct rte_eth_dev *dev); +void eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev, + uint16_t rx_queue_id, int on); +int eth_igc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id); + +int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset); + +int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset); + +int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset); + +int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +int eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt); + +int igc_rx_init(struct rte_eth_dev *dev); +void igc_tx_init(struct rte_eth_dev *dev); + +void +igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf); +void eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo); +void eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + +int +eth_igc_timesync_read_rx_timestamp(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t queue_idx); + +int igc_enable_rx_queue_timestamp(struct rte_eth_dev *dev, uint16_t queue_idx); + + +#endif -- 1.8.3.1