* [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only @ 2018-04-18 15:30 Pavan Nikhilesh 2018-04-18 15:30 ` [dpdk-dev] [PATCH 2/2] drivers: mark logtype variables as read mostly Pavan Nikhilesh 2018-04-18 17:43 ` [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Ferruh Yigit 0 siblings, 2 replies; 10+ messages in thread From: Pavan Nikhilesh @ 2018-04-18 15:30 UTC (permalink / raw) To: thomas, jerin.jacob, techboard; +Cc: dev, Pavan Nikhilesh Add macro to mark a variable to be mostly read only and place it in a separate section. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- Group together mostly read only data to avoid cacheline bouncing, also useful for auditing purposes. lib/librte_eal/common/include/rte_common.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 6c5bc5a76..f2ff2e9e6 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) */ #define __rte_noinline __attribute__((noinline)) +/** + * Mark a variable to be mostly read only and place it in a separate section. + */ +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) + /*********** Macros for pointer arithmetic ********/ /** -- 2.17.0 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH 2/2] drivers: mark logtype variables as read mostly 2018-04-18 15:30 [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Pavan Nikhilesh @ 2018-04-18 15:30 ` Pavan Nikhilesh 2018-04-18 17:43 ` [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Ferruh Yigit 1 sibling, 0 replies; 10+ messages in thread From: Pavan Nikhilesh @ 2018-04-18 15:30 UTC (permalink / raw) To: thomas, jerin.jacob, techboard; +Cc: dev, Pavan Nikhilesh Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- drivers/bus/dpaa/dpaa_bus.c | 8 ++++---- drivers/bus/fslmc/fslmc_bus.c | 2 +- drivers/bus/vdev/vdev.c | 2 +- drivers/common/octeontx/octeontx_mbox.c | 2 +- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 2 +- drivers/event/dpaa2/dpaa2_eventdev.c | 2 +- drivers/event/octeontx/ssovf_evdev.c | 2 +- drivers/event/octeontx/timvf_evdev.c | 2 +- drivers/event/opdl/opdl_ring.c | 2 +- drivers/event/sw/sw_evdev.c | 2 +- drivers/mempool/dpaa2/dpaa2_hw_mempool.c | 2 +- drivers/mempool/octeontx/octeontx_fpavf.c | 4 ++-- drivers/net/avf/avf_ethdev.c | 4 ++-- drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- drivers/net/bnx2x/bnx2x_ethdev.c | 4 ++-- drivers/net/bnxt/bnxt_ethdev.c | 2 +- drivers/net/dpaa2/dpaa2_ethdev.c | 2 +- drivers/net/e1000/em_ethdev.c | 4 ++-- drivers/net/ena/ena_ethdev.c | 4 ++-- drivers/net/enic/enic_ethdev.c | 4 ++-- drivers/net/fm10k/fm10k_ethdev.c | 4 ++-- drivers/net/i40e/i40e_ethdev.c | 4 ++-- drivers/net/ixgbe/ixgbe_ethdev.c | 4 ++-- drivers/net/liquidio/lio_ethdev.c | 4 ++-- drivers/net/mlx5/mlx5.c | 2 +- drivers/net/nfp/nfp_net.c | 4 ++-- drivers/net/octeontx/octeontx_ethdev.c | 6 +++--- drivers/net/qede/qede_ethdev.c | 4 ++-- drivers/net/sfc/sfc_ethdev.c | 2 +- drivers/net/szedata2/rte_eth_szedata2.c | 4 ++-- drivers/net/thunderx/nicvf_ethdev.c | 6 +++--- drivers/net/virtio/virtio_ethdev.c | 4 ++-- drivers/net/vmxnet3/vmxnet3_ethdev.c | 4 ++-- drivers/raw/skeleton_rawdev/skeleton_rawdev.c | 2 +- lib/librte_member/rte_member.c | 2 +- lib/librte_rawdev/rte_rawdev.c | 2 +- 37 files changed, 60 insertions(+), 60 deletions(-) diff --git a/drivers/bus/dpaa/dpaa_bus.c b/drivers/bus/dpaa/dpaa_bus.c index ffc90a702..3758960d9 100644 --- a/drivers/bus/dpaa/dpaa_bus.c +++ b/drivers/bus/dpaa/dpaa_bus.c @@ -41,10 +41,10 @@ #include <of.h> #include <netcfg.h> -int dpaa_logtype_bus; -int dpaa_logtype_mempool; -int dpaa_logtype_pmd; -int dpaa_logtype_eventdev; +int dpaa_logtype_bus __rte_read_mostly; +int dpaa_logtype_mempool __rte_read_mostly; +int dpaa_logtype_pmd __rte_read_mostly; +int dpaa_logtype_eventdev __rte_read_mostly; struct rte_dpaa_bus rte_dpaa_bus; struct netcfg_info *dpaa_netcfg; diff --git a/drivers/bus/fslmc/fslmc_bus.c b/drivers/bus/fslmc/fslmc_bus.c index d0b32611f..2a9f23725 100644 --- a/drivers/bus/fslmc/fslmc_bus.c +++ b/drivers/bus/fslmc/fslmc_bus.c @@ -20,7 +20,7 @@ #include <fslmc_vfio.h> #include "fslmc_logs.h" -int dpaa2_logtype_bus; +int dpaa2_logtype_bus __rte_read_mostly; #define VFIO_IOMMU_GROUP_PATH "/sys/kernel/iommu_groups" diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c index f8dd1f5e6..b062b8766 100644 --- a/drivers/bus/vdev/vdev.c +++ b/drivers/bus/vdev/vdev.c @@ -23,7 +23,7 @@ #include "rte_bus_vdev.h" #include "vdev_logs.h" -int vdev_logtype_bus; +int vdev_logtype_bus __rte_read_mostly; /* Forward declare to access virtual bus name */ static struct rte_bus rte_vdev_bus; diff --git a/drivers/common/octeontx/octeontx_mbox.c b/drivers/common/octeontx/octeontx_mbox.c index 93e6e8579..e3468959e 100644 --- a/drivers/common/octeontx/octeontx_mbox.c +++ b/drivers/common/octeontx/octeontx_mbox.c @@ -59,7 +59,7 @@ struct mbox_ram_hdr { }; }; -int octeontx_logtype_mbox; +int octeontx_logtype_mbox __rte_read_mostly; RTE_INIT(otx_init_log); static void diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c index 23012e35a..77fe45724 100644 --- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c @@ -56,7 +56,7 @@ enum rta_sec_era rta_sec_era = RTA_SEC_ERA_8; static uint8_t cryptodev_driver_id; -int dpaa2_logtype_sec; +int dpaa2_logtype_sec __rte_read_mostly; static inline int build_proto_fd(dpaa2_sec_session *sess, diff --git a/drivers/event/dpaa2/dpaa2_eventdev.c b/drivers/event/dpaa2/dpaa2_eventdev.c index f50bb8dc6..84652836a 100644 --- a/drivers/event/dpaa2/dpaa2_eventdev.c +++ b/drivers/event/dpaa2/dpaa2_eventdev.c @@ -48,7 +48,7 @@ */ /* Dynamic logging identified for mempool */ -int dpaa2_logtype_event; +int dpaa2_logtype_event __rte_read_mostly; static uint16_t dpaa2_eventdev_enqueue_burst(void *port, const struct rte_event ev[], diff --git a/drivers/event/octeontx/ssovf_evdev.c b/drivers/event/octeontx/ssovf_evdev.c index 2df70b52a..7a261e174 100644 --- a/drivers/event/octeontx/ssovf_evdev.c +++ b/drivers/event/octeontx/ssovf_evdev.c @@ -20,7 +20,7 @@ #include "ssovf_evdev.h" #include "timvf_evdev.h" -int otx_logtype_ssovf; +int otx_logtype_ssovf __rte_read_mostly; static uint8_t timvf_enable_stats; RTE_INIT(otx_ssovf_init_log); diff --git a/drivers/event/octeontx/timvf_evdev.c b/drivers/event/octeontx/timvf_evdev.c index b20a2f1f5..a4c69ddb5 100644 --- a/drivers/event/octeontx/timvf_evdev.c +++ b/drivers/event/octeontx/timvf_evdev.c @@ -5,7 +5,7 @@ #include "timvf_evdev.h" -int otx_logtype_timvf; +int otx_logtype_timvf __rte_read_mostly; RTE_INIT(otx_timvf_init_log); static void diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c index 8aca481c9..1d2e8f633 100644 --- a/drivers/event/opdl/opdl_ring.c +++ b/drivers/event/opdl/opdl_ring.c @@ -30,7 +30,7 @@ #define OPDL_OPA_MASK (0xFF) #define OPDL_OPA_OFFSET (0x38) -int opdl_logtype_driver; +int opdl_logtype_driver __rte_read_mostly; /* Types of dependency between stages */ enum dep_type { diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index dcb655108..4733e6600 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -949,7 +949,7 @@ RTE_PMD_REGISTER_PARAM_STRING(event_sw, NUMA_NODE_ARG "=<int> " SCHED_QUANTA_ARG "=<int>" CREDIT_QUANTA_ARG "=<int>"); /* declared extern in header, for access from other .c files */ -int eventdev_sw_log_level; +int eventdev_sw_log_level __rte_read_mostly; RTE_INIT(evdev_sw_init_log); static void diff --git a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c index ce7a4c577..3659d9eb7 100644 --- a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c +++ b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c @@ -33,7 +33,7 @@ struct dpaa2_bp_info rte_dpaa2_bpid_info[MAX_BPID]; static struct dpaa2_bp_list *h_bp_list; /* Dynamic logging identified for mempool */ -int dpaa2_logtype_mempool; +int dpaa2_logtype_mempool __rte_read_mostly; static int rte_hw_mbuf_create_pool(struct rte_mempool *mp) diff --git a/drivers/mempool/octeontx/octeontx_fpavf.c b/drivers/mempool/octeontx/octeontx_fpavf.c index 7aecaa85d..48df4c73e 100644 --- a/drivers/mempool/octeontx/octeontx_fpavf.c +++ b/drivers/mempool/octeontx/octeontx_fpavf.c @@ -105,8 +105,8 @@ struct octeontx_fpadev { static struct octeontx_fpadev fpadev; -int octeontx_logtype_fpavf; -int octeontx_logtype_fpavf_mbox; +int octeontx_logtype_fpavf __rte_read_mostly; +int octeontx_logtype_fpavf_mbox __rte_read_mostly; RTE_INIT(otx_pool_init_log); static void diff --git a/drivers/net/avf/avf_ethdev.c b/drivers/net/avf/avf_ethdev.c index a1ae3a23a..f2e3f62f0 100644 --- a/drivers/net/avf/avf_ethdev.c +++ b/drivers/net/avf/avf_ethdev.c @@ -72,8 +72,8 @@ static int avf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, static int avf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id); -int avf_logtype_init; -int avf_logtype_driver; +int avf_logtype_init __rte_read_mostly; +int avf_logtype_driver __rte_read_mostly; static const struct rte_pci_id pci_id_avf_map[] = { { RTE_PCI_DEVICE(AVF_INTEL_VENDOR_ID, AVF_DEV_ID_ADAPTIVE_VF) }, diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 5b3c4cebf..4a039da37 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -32,7 +32,7 @@ #include "avp_logs.h" -int avp_logtype_driver; +int avp_logtype_driver __rte_read_mostly; static int avp_dev_create(struct rte_pci_device *pci_dev, struct rte_eth_dev *eth_dev); diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index 7a3ba2e7b..0707127b1 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -32,8 +32,8 @@ static void axgbe_dev_info_get(struct rte_eth_dev *dev, #define AMD_PCI_AXGBE_DEVICE_V2A 0x1458 #define AMD_PCI_AXGBE_DEVICE_V2B 0x1459 -int axgbe_logtype_init; -int axgbe_logtype_driver; +int axgbe_logtype_init __rte_read_mostly; +int axgbe_logtype_driver __rte_read_mostly; static const struct rte_pci_id pci_id_axgbe_map[] = { {RTE_PCI_DEVICE(AMD_PCI_VENDOR_ID, AMD_PCI_AXGBE_DEVICE_V2A)}, diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index e08ef779f..207edb4b9 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -14,8 +14,8 @@ #include <rte_dev.h> #include <rte_ethdev_pci.h> -int bnx2x_logtype_init; -int bnx2x_logtype_driver; +int bnx2x_logtype_init __rte_read_mostly; +int bnx2x_logtype_driver __rte_read_mostly; /* * The set of PCI devices this driver supports diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 1d4ff54b7..a81646c7c 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -30,7 +30,7 @@ #define DRV_MODULE_NAME "bnxt" static const char bnxt_version[] = "Broadcom Cumulus driver " DRV_MODULE_NAME "\n"; -int bnxt_logtype_driver; +int bnxt_logtype_driver __rte_read_mostly; #define PCI_VENDOR_ID_BROADCOM 0x14E4 diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c index 54ab9eb15..fcb8c9d55 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/drivers/net/dpaa2/dpaa2_ethdev.c @@ -57,7 +57,7 @@ static int dpaa2_dev_set_link_up(struct rte_eth_dev *dev); static int dpaa2_dev_set_link_down(struct rte_eth_dev *dev); static int dpaa2_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); -int dpaa2_logtype_pmd; +int dpaa2_logtype_pmd __rte_read_mostly; static int dpaa2_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index de7db2650..3f301764a 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -106,8 +106,8 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, static enum e1000_fc_mode em_fc_setting = e1000_fc_full; -int e1000_logtype_init; -int e1000_logtype_driver; +int e1000_logtype_init __rte_read_mostly; +int e1000_logtype_driver __rte_read_mostly; /* * The set of PCI devices this driver supports diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index ab4e2af91..ec40690e4 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -186,8 +186,8 @@ static const struct ena_stats ena_stats_ena_com_strings[] = { #define ENA_TX_OFFLOAD_NOTSUP_MASK \ (PKT_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) -int ena_logtype_init; -int ena_logtype_driver; +int ena_logtype_init __rte_read_mostly; +int ena_logtype_driver __rte_read_mostly; static const struct rte_pci_id pci_id_ena_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c index 801f4704c..cd0bcbb8b 100644 --- a/drivers/net/enic/enic_ethdev.c +++ b/drivers/net/enic/enic_ethdev.c @@ -20,8 +20,8 @@ #include "vnic_enet.h" #include "enic.h" -int enicpmd_logtype_init; -int enicpmd_logtype_flow; +int enicpmd_logtype_init __rte_read_mostly; +int enicpmd_logtype_flow __rte_read_mostly; #define PMD_INIT_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, enicpmd_logtype_init, \ diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index 34affd1cc..d288ab916 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -40,8 +40,8 @@ #define GLORT_FD_MASK GLORT_PF_MASK #define GLORT_FD_INDEX GLORT_FD_Q_BASE -int fm10k_logtype_init; -int fm10k_logtype_driver; +int fm10k_logtype_init __rte_read_mostly; +int fm10k_logtype_driver __rte_read_mostly; static void fm10k_close_mbx_service(struct fm10k_hw *hw); static void fm10k_dev_promiscuous_enable(struct rte_eth_dev *dev); diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 180ac7449..453a028b4 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -392,8 +392,8 @@ static void i40e_tunnel_filter_restore(struct i40e_pf *pf); static void i40e_filter_restore(struct i40e_pf *pf); static void i40e_notify_all_vfs_link_status(struct rte_eth_dev *dev); -int i40e_logtype_init; -int i40e_logtype_driver; +int i40e_logtype_init __rte_read_mostly; +int i40e_logtype_driver __rte_read_mostly; static const struct rte_pci_id pci_id_i40e_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710) }, diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index a5e2fc0ca..0626ac0df 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -400,8 +400,8 @@ static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev); (r) = (h)->bitmap[idx] >> bit & 1;\ } while (0) -int ixgbe_logtype_init; -int ixgbe_logtype_driver; +int ixgbe_logtype_init __rte_read_mostly; +int ixgbe_logtype_driver __rte_read_mostly; /* * The set of PCI devices this driver supports diff --git a/drivers/net/liquidio/lio_ethdev.c b/drivers/net/liquidio/lio_ethdev.c index a13a566f9..1ece118ba 100644 --- a/drivers/net/liquidio/lio_ethdev.c +++ b/drivers/net/liquidio/lio_ethdev.c @@ -14,8 +14,8 @@ #include "lio_ethdev.h" #include "lio_rxtx.h" -int lio_logtype_init; -int lio_logtype_driver; +int lio_logtype_init __rte_read_mostly; +int lio_logtype_driver __rte_read_mostly; /* Default RSS key in use */ static uint8_t lio_rss_key[40] = { diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 68783c3ac..e35a6e1e0 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -82,7 +82,7 @@ #endif /** Driver-specific log messages type. */ -int mlx5_logtype; +int mlx5_logtype __rte_read_mostly; /** * Retrieve integer value from environment variable. diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index bedd4b668..df2ccab4a 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -3307,8 +3307,8 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, return ret; } -int nfp_logtype_init; -int nfp_logtype_driver; +int nfp_logtype_init __rte_read_mostly; +int nfp_logtype_driver __rte_read_mostly; static const struct rte_pci_id pci_id_nfp_pf_net_map[] = { { diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c index 6d67d257c..c5d4805f2 100644 --- a/drivers/net/octeontx/octeontx_ethdev.c +++ b/drivers/net/octeontx/octeontx_ethdev.c @@ -42,9 +42,9 @@ enum octeontx_link_speed { OCTEONTX_LINK_SPEED_RESERVE2 }; -int otx_net_logtype_mbox; -int otx_net_logtype_init; -int otx_net_logtype_driver; +int otx_net_logtype_mbox __rte_read_mostly; +int otx_net_logtype_init __rte_read_mostly; +int otx_net_logtype_driver __rte_read_mostly; RTE_INIT(otx_net_init_log); static void diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 12023002e..4959438b2 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -12,8 +12,8 @@ #include <rte_kvargs.h> /* Globals */ -int qede_logtype_init; -int qede_logtype_driver; +int qede_logtype_init __rte_read_mostly; +int qede_logtype_driver __rte_read_mostly; static const struct qed_eth_ops *qed_ops; static int64_t timer_period = 1; diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c index 47d7a8609..a13a78e96 100644 --- a/drivers/net/sfc/sfc_ethdev.c +++ b/drivers/net/sfc/sfc_ethdev.c @@ -27,7 +27,7 @@ #include "sfc_dp.h" #include "sfc_dp_rx.h" -uint32_t sfc_logtype_driver; +uint32_t sfc_logtype_driver __rte_read_mostly; static struct sfc_dp_list sfc_dp_head = TAILQ_HEAD_INITIALIZER(sfc_dp_head); diff --git a/drivers/net/szedata2/rte_eth_szedata2.c b/drivers/net/szedata2/rte_eth_szedata2.c index d105e50f3..63b352159 100644 --- a/drivers/net/szedata2/rte_eth_szedata2.c +++ b/drivers/net/szedata2/rte_eth_szedata2.c @@ -102,8 +102,8 @@ struct szedata2_tx_queue { volatile uint64_t err_pkts; }; -int szedata2_logtype_init; -int szedata2_logtype_driver; +int szedata2_logtype_init __rte_read_mostly; +int szedata2_logtype_driver __rte_read_mostly; static struct ether_addr eth_addr = { .addr_bytes = { 0x00, 0x11, 0x17, 0x00, 0x00, 0x00 } diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c index 75e9d16c5..9fcbc1cd7 100644 --- a/drivers/net/thunderx/nicvf_ethdev.c +++ b/drivers/net/thunderx/nicvf_ethdev.c @@ -42,9 +42,9 @@ #include "nicvf_svf.h" #include "nicvf_logs.h" -int nicvf_logtype_mbox; -int nicvf_logtype_init; -int nicvf_logtype_driver; +int nicvf_logtype_mbox __rte_read_mostly; +int nicvf_logtype_init __rte_read_mostly; +int nicvf_logtype_driver __rte_read_mostly; static void nicvf_dev_stop(struct rte_eth_dev *dev); static void nicvf_dev_stop_cleanup(struct rte_eth_dev *dev, bool cleanup); diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 41042cb23..a4d416db1 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -79,8 +79,8 @@ static int virtio_dev_queue_stats_mapping_set( uint8_t stat_idx, uint8_t is_rx); -int virtio_logtype_init; -int virtio_logtype_driver; +int virtio_logtype_init __rte_read_mostly; +int virtio_logtype_driver __rte_read_mostly; static void virtio_notify_peers(struct rte_eth_dev *dev); static void virtio_ack_link_announce(struct rte_eth_dev *dev); diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 456852108..f5cc103d3 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -76,8 +76,8 @@ static int vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr); static void vmxnet3_interrupt_handler(void *param); -int vmxnet3_logtype_init; -int vmxnet3_logtype_driver; +int vmxnet3_logtype_init __rte_read_mostly; +int vmxnet3_logtype_driver __rte_read_mostly; /* * The set of PCI devices this driver supports diff --git a/drivers/raw/skeleton_rawdev/skeleton_rawdev.c b/drivers/raw/skeleton_rawdev/skeleton_rawdev.c index 6bdbbb50d..0d2a7fa9d 100644 --- a/drivers/raw/skeleton_rawdev/skeleton_rawdev.c +++ b/drivers/raw/skeleton_rawdev/skeleton_rawdev.c @@ -29,7 +29,7 @@ #include "skeleton_rawdev.h" /* Dynamic log type identifier */ -int skeleton_pmd_logtype; +int skeleton_pmd_logtype __rte_read_mostly; /* Count of instances */ uint16_t skeldev_init_once; diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c index e147dd1f1..2d0e8aeb5 100644 --- a/lib/librte_member/rte_member.c +++ b/lib/librte_member/rte_member.c @@ -14,7 +14,7 @@ #include "rte_member_ht.h" #include "rte_member_vbf.h" -int librte_member_logtype; +int librte_member_logtype __rte_read_mostly; TAILQ_HEAD(rte_member_list, rte_tailq_entry); static struct rte_tailq_elem rte_member_tailq = { diff --git a/lib/librte_rawdev/rte_rawdev.c b/lib/librte_rawdev/rte_rawdev.c index d314ef96b..38445e854 100644 --- a/lib/librte_rawdev/rte_rawdev.c +++ b/lib/librte_rawdev/rte_rawdev.c @@ -33,7 +33,7 @@ #include "rte_rawdev_pmd.h" /* dynamic log identifier */ -int librawdev_logtype; +int librawdev_logtype __rte_read_mostly; struct rte_rawdev rte_rawdevices[RTE_RAWDEV_MAX_DEVS]; -- 2.17.0 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-18 15:30 [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Pavan Nikhilesh 2018-04-18 15:30 ` [dpdk-dev] [PATCH 2/2] drivers: mark logtype variables as read mostly Pavan Nikhilesh @ 2018-04-18 17:43 ` Ferruh Yigit 2018-04-18 17:55 ` Pavan Nikhilesh 1 sibling, 1 reply; 10+ messages in thread From: Ferruh Yigit @ 2018-04-18 17:43 UTC (permalink / raw) To: Pavan Nikhilesh, thomas, jerin.jacob, techboard; +Cc: dev On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > Add macro to mark a variable to be mostly read only and place it in a > separate section. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > --- > > Group together mostly read only data to avoid cacheline bouncing, also > useful for auditing purposes. > > lib/librte_eal/common/include/rte_common.h | 5 +++++ > 1 file changed, 5 insertions(+) > > diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > index 6c5bc5a76..f2ff2e9e6 100644 > --- a/lib/librte_eal/common/include/rte_common.h > +++ b/lib/librte_eal/common/include/rte_common.h > @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > */ > #define __rte_noinline __attribute__((noinline)) > > +/** > + * Mark a variable to be mostly read only and place it in a separate section. > + */ > +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) Hi Pavan, Is the section ".read_mostly" treated specially [1] or is this just for grouping symbols together (to reduce cacheline bouncing)? [1] If this is special section, can you please point counter part in the kernel? > + > /*********** Macros for pointer arithmetic ********/ > > /** > -- > 2.17.0 > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-18 17:43 ` [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Ferruh Yigit @ 2018-04-18 17:55 ` Pavan Nikhilesh 2018-04-18 18:03 ` Ferruh Yigit 0 siblings, 1 reply; 10+ messages in thread From: Pavan Nikhilesh @ 2018-04-18 17:55 UTC (permalink / raw) To: Ferruh Yigit, thomas, jerin.jacob, techboard; +Cc: dev On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: > On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > > Add macro to mark a variable to be mostly read only and place it in a > > separate section. > > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > > --- > > > > Group together mostly read only data to avoid cacheline bouncing, also > > useful for auditing purposes. > > > > lib/librte_eal/common/include/rte_common.h | 5 +++++ > > 1 file changed, 5 insertions(+) > > > > diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > > index 6c5bc5a76..f2ff2e9e6 100644 > > --- a/lib/librte_eal/common/include/rte_common.h > > +++ b/lib/librte_eal/common/include/rte_common.h > > @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > > */ > > #define __rte_noinline __attribute__((noinline)) > > > > +/** > > + * Mark a variable to be mostly read only and place it in a separate section. > > + */ > > +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) > Hi Ferruh, > Hi Pavan, > > Is the section ".read_mostly" treated specially [1] or is this just for grouping > symbols together (to reduce cacheline bouncing)? The section .read_mostly is not treated specially it's just for grouping symbols. > > [1] > If this is special section, can you please point counter part in the kernel? The kernel has something similar[1] but they have a custom linker script to arrange symbols. [1] https://github.com/torvalds/linux/blob/a27fc14219f2e3c4a46ba9177b04d9b52c875532/arch/x86/include/asm/cache.h#L11 kernel commit id 54cb27a71f51d304342c79e62fd7667f2171062b > > > > + > > /*********** Macros for pointer arithmetic ********/ > > > > /** > > -- > > 2.17.0 > > > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-18 17:55 ` Pavan Nikhilesh @ 2018-04-18 18:03 ` Ferruh Yigit 2018-04-19 9:20 ` Pavan Nikhilesh 0 siblings, 1 reply; 10+ messages in thread From: Ferruh Yigit @ 2018-04-18 18:03 UTC (permalink / raw) To: Pavan Nikhilesh, thomas, jerin.jacob, techboard; +Cc: dev On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote: > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: >>> Add macro to mark a variable to be mostly read only and place it in a >>> separate section. >>> >>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> >>> --- >>> >>> Group together mostly read only data to avoid cacheline bouncing, also >>> useful for auditing purposes. >>> >>> lib/librte_eal/common/include/rte_common.h | 5 +++++ >>> 1 file changed, 5 insertions(+) >>> >>> diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h >>> index 6c5bc5a76..f2ff2e9e6 100644 >>> --- a/lib/librte_eal/common/include/rte_common.h >>> +++ b/lib/librte_eal/common/include/rte_common.h >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) >>> */ >>> #define __rte_noinline __attribute__((noinline)) >>> >>> +/** >>> + * Mark a variable to be mostly read only and place it in a separate section. >>> + */ >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) >> > > Hi Ferruh, > >> Hi Pavan, >> >> Is the section ".read_mostly" treated specially [1] or is this just for grouping >> symbols together (to reduce cacheline bouncing)? > > The section .read_mostly is not treated specially it's just for grouping > symbols. I have encounter with a blog post claiming this is not working: " The problem with the above approach is that once all the __read_mostly variables are grouped into one section, the remaining "non-read-mostly" variables end-up together too. This increases the chances that two frequently used elements (in the "non-read-mostly" region) will end-up competing for the same position (or cache-line, the basic fixed-sized block for memory<-->cache transfers) in the cache. Thus frequent accesses will cause excessive cache thrashing on that particular cache-line thereby degrading the overall system performance. " https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html > >> >> [1] >> If this is special section, can you please point counter part in the kernel? > > The kernel has something similar[1] but they have a custom linker script to > arrange symbols. > > [1] https://github.com/torvalds/linux/blob/a27fc14219f2e3c4a46ba9177b04d9b52c875532/arch/x86/include/asm/cache.h#L11 > kernel commit id 54cb27a71f51d304342c79e62fd7667f2171062b > >> >> >>> + >>> /*********** Macros for pointer arithmetic ********/ >>> >>> /** >>> -- >>> 2.17.0 >>> >> ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-18 18:03 ` Ferruh Yigit @ 2018-04-19 9:20 ` Pavan Nikhilesh 2018-04-19 12:09 ` Bruce Richardson 0 siblings, 1 reply; 10+ messages in thread From: Pavan Nikhilesh @ 2018-04-19 9:20 UTC (permalink / raw) To: Ferruh Yigit, thomas, jerin.jacob, techboard; +Cc: dev On Wed, Apr 18, 2018 at 07:03:06PM +0100, Ferruh Yigit wrote: > On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote: > > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: > >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > >>> Add macro to mark a variable to be mostly read only and place it in a > >>> separate section. > >>> > >>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > >>> --- > >>> > >>> Group together mostly read only data to avoid cacheline bouncing, also > >>> useful for auditing purposes. > >>> > >>> lib/librte_eal/common/include/rte_common.h | 5 +++++ > >>> 1 file changed, 5 insertions(+) > >>> > >>> diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > >>> index 6c5bc5a76..f2ff2e9e6 100644 > >>> --- a/lib/librte_eal/common/include/rte_common.h > >>> +++ b/lib/librte_eal/common/include/rte_common.h > >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > >>> */ > >>> #define __rte_noinline __attribute__((noinline)) > >>> > >>> +/** > >>> + * Mark a variable to be mostly read only and place it in a separate section. > >>> + */ > >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) > >> > > > > Hi Ferruh, > > > >> Hi Pavan, > >> > >> Is the section ".read_mostly" treated specially [1] or is this just for grouping > >> symbols together (to reduce cacheline bouncing)? > > > > The section .read_mostly is not treated specially it's just for grouping > > symbols. > > I have encounter with a blog post claiming this is not working: > > " > The problem with the above approach is that once all the __read_mostly variables > are grouped into one section, the remaining "non-read-mostly" variables end-up > together too. This increases the chances that two frequently used elements (in > the "non-read-mostly" region) will end-up competing for the same position (or > cache-line, the basic fixed-sized block for memory<-->cache transfers) in the > cache. Thus frequent accesses will cause excessive cache thrashing on that > particular cache-line thereby degrading the overall system performance. > " > > https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html > The author is concerned about processors with less cache set-associativity, almost all modern processors have >= 16 way set associativity. And the above issue can happen even now when two frequently written global variables are placed next to each other. Currently, we don't have much control over how the global variables are arranged and a single addition/deletion to the global variables causes change in alignment and in some cases minor performance regression. Tagging them as __read_mostly we can easily identify the alignment changes across builds by comparing map files global variable section. I have verified the patch-set on arm64 (16-way set-associative) and didn't notice any performance regression. Did you have a chance to verify if there is any performance regression? > > > >> > >> [1] > >> If this is special section, can you please point counter part in the kernel? > > > > The kernel has something similar[1] but they have a custom linker script to > > arrange symbols. > > > > [1] https://github.com/torvalds/linux/blob/a27fc14219f2e3c4a46ba9177b04d9b52c875532/arch/x86/include/asm/cache.h#L11 > > kernel commit id 54cb27a71f51d304342c79e62fd7667f2171062b > > > >> > >> > >>> + > >>> /*********** Macros for pointer arithmetic ********/ > >>> > >>> /** > >>> -- > >>> 2.17.0 > >>> > >> > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-19 9:20 ` Pavan Nikhilesh @ 2018-04-19 12:09 ` Bruce Richardson 2018-04-19 15:18 ` Pavan Nikhilesh 0 siblings, 1 reply; 10+ messages in thread From: Bruce Richardson @ 2018-04-19 12:09 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: Ferruh Yigit, thomas, jerin.jacob, techboard, dev On Thu, Apr 19, 2018 at 02:50:52PM +0530, Pavan Nikhilesh wrote: > On Wed, Apr 18, 2018 at 07:03:06PM +0100, Ferruh Yigit wrote: > > On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote: > > > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: > > >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > > >>> Add macro to mark a variable to be mostly read only and place it in a > > >>> separate section. > > >>> > > >>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > > >>> --- > > >>> > > >>> Group together mostly read only data to avoid cacheline bouncing, also > > >>> useful for auditing purposes. > > >>> > > >>> lib/librte_eal/common/include/rte_common.h | 5 +++++ > > >>> 1 file changed, 5 insertions(+) > > >>> > > >>> diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > > >>> index 6c5bc5a76..f2ff2e9e6 100644 > > >>> --- a/lib/librte_eal/common/include/rte_common.h > > >>> +++ b/lib/librte_eal/common/include/rte_common.h > > >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > > >>> */ > > >>> #define __rte_noinline __attribute__((noinline)) > > >>> > > >>> +/** > > >>> + * Mark a variable to be mostly read only and place it in a separate section. > > >>> + */ > > >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) > > >> > > > > > > Hi Ferruh, > > > > > >> Hi Pavan, > > >> > > >> Is the section ".read_mostly" treated specially [1] or is this just for grouping > > >> symbols together (to reduce cacheline bouncing)? > > > > > > The section .read_mostly is not treated specially it's just for grouping > > > symbols. > > > > I have encounter with a blog post claiming this is not working: > > > > " > > The problem with the above approach is that once all the __read_mostly variables > > are grouped into one section, the remaining "non-read-mostly" variables end-up > > together too. This increases the chances that two frequently used elements (in > > the "non-read-mostly" region) will end-up competing for the same position (or > > cache-line, the basic fixed-sized block for memory<-->cache transfers) in the > > cache. Thus frequent accesses will cause excessive cache thrashing on that > > particular cache-line thereby degrading the overall system performance. > > " > > > > https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html > > > > The author is concerned about processors with less cache set-associativity, > almost all modern processors have >= 16 way set associativity. And the above > issue can happen even now when two frequently written global variables are > placed next to each other. > > Currently, we don't have much control over how the global variables are > arranged and a single addition/deletion to the global variables causes change > in alignment and in some cases minor performance regression. > Tagging them as __read_mostly we can easily identify the alignment changes > across builds by comparing map files global variable section. > > I have verified the patch-set on arm64 (16-way set-associative) and didn't > notice any performance regression. > Did you have a chance to verify if there is any performance regression? > Is there a performance improvement? It's seems a relatively strange change to me, so I'd like to know that it really improves performance in test cases. /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-19 12:09 ` Bruce Richardson @ 2018-04-19 15:18 ` Pavan Nikhilesh 2018-04-19 15:37 ` Bruce Richardson 0 siblings, 1 reply; 10+ messages in thread From: Pavan Nikhilesh @ 2018-04-19 15:18 UTC (permalink / raw) To: Bruce Richardson, Ferruh Yigit, thomas, jerin.jacob, techboard, dev; +Cc: dev On Thu, Apr 19, 2018 at 01:09:58PM +0100, Bruce Richardson wrote: > On Thu, Apr 19, 2018 at 02:50:52PM +0530, Pavan Nikhilesh wrote: > > On Wed, Apr 18, 2018 at 07:03:06PM +0100, Ferruh Yigit wrote: > > > On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote: > > > > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: > > > >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > > > >>> Add macro to mark a variable to be mostly read only and place it in a > > > >>> separate section. > > > >>> > > > >>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > > > >>> --- > > > >>> > > > >>> Group together mostly read only data to avoid cacheline bouncing, also > > > >>> useful for auditing purposes. > > > >>> > > > >>> lib/librte_eal/common/include/rte_common.h | 5 +++++ > > > >>> 1 file changed, 5 insertions(+) > > > >>> > > > >>> diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > > > >>> index 6c5bc5a76..f2ff2e9e6 100644 > > > >>> --- a/lib/librte_eal/common/include/rte_common.h > > > >>> +++ b/lib/librte_eal/common/include/rte_common.h > > > >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > > > >>> */ > > > >>> #define __rte_noinline __attribute__((noinline)) > > > >>> > > > >>> +/** > > > >>> + * Mark a variable to be mostly read only and place it in a separate section. > > > >>> + */ > > > >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) > > > >> > > > > > > > > Hi Ferruh, > > > > > > > >> Hi Pavan, > > > >> > > > >> Is the section ".read_mostly" treated specially [1] or is this just for grouping > > > >> symbols together (to reduce cacheline bouncing)? > > > > > > > > The section .read_mostly is not treated specially it's just for grouping > > > > symbols. > > > > > > I have encounter with a blog post claiming this is not working: > > > > > > " > > > The problem with the above approach is that once all the __read_mostly variables > > > are grouped into one section, the remaining "non-read-mostly" variables end-up > > > together too. This increases the chances that two frequently used elements (in > > > the "non-read-mostly" region) will end-up competing for the same position (or > > > cache-line, the basic fixed-sized block for memory<-->cache transfers) in the > > > cache. Thus frequent accesses will cause excessive cache thrashing on that > > > particular cache-line thereby degrading the overall system performance. > > > " > > > > > > https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html > > > > > > > The author is concerned about processors with less cache set-associativity, > > almost all modern processors have >= 16 way set associativity. And the above > > issue can happen even now when two frequently written global variables are > > placed next to each other. > > > > Currently, we don't have much control over how the global variables are > > arranged and a single addition/deletion to the global variables causes change > > in alignment and in some cases minor performance regression. > > Tagging them as __read_mostly we can easily identify the alignment changes > > across builds by comparing map files global variable section. > > > > I have verified the patch-set on arm64 (16-way set-associative) and didn't > > notice any performance regression. > > Did you have a chance to verify if there is any performance regression? > > > Is there a performance improvement? It's seems a relatively strange change > to me, so I'd like to know that it really improves performance in test > cases. We had a performance regression of ~200k between 17.11 and 18.02 due enabling dpaa/dpaa2 in default config this was due to new global variables being added and changing the alignment. Moving read mostly global variables (logtypes/device arrays) to a separate section helps when tracking performance regression between builds. > > /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-19 15:18 ` Pavan Nikhilesh @ 2018-04-19 15:37 ` Bruce Richardson 2018-04-19 15:55 ` Pavan Nikhilesh 0 siblings, 1 reply; 10+ messages in thread From: Bruce Richardson @ 2018-04-19 15:37 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: Ferruh Yigit, thomas, jerin.jacob, techboard, dev On Thu, Apr 19, 2018 at 08:48:33PM +0530, Pavan Nikhilesh wrote: > On Thu, Apr 19, 2018 at 01:09:58PM +0100, Bruce Richardson wrote: > > On Thu, Apr 19, 2018 at 02:50:52PM +0530, Pavan Nikhilesh wrote: > > > On Wed, Apr 18, 2018 at 07:03:06PM +0100, Ferruh Yigit wrote: > > > > On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote: > > > > > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: > > > > >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > > > > >>> Add macro to mark a variable to be mostly read only and place it in a > > > > >>> separate section. > > > > >>> > > > > >>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > > > > >>> --- > > > > >>> > > > > >>> Group together mostly read only data to avoid cacheline bouncing, also > > > > >>> useful for auditing purposes. > > > > >>> > > > > >>> lib/librte_eal/common/include/rte_common.h | 5 +++++ > > > > >>> 1 file changed, 5 insertions(+) > > > > >>> > > > > >>> diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > > > > >>> index 6c5bc5a76..f2ff2e9e6 100644 > > > > >>> --- a/lib/librte_eal/common/include/rte_common.h > > > > >>> +++ b/lib/librte_eal/common/include/rte_common.h > > > > >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > > > > >>> */ > > > > >>> #define __rte_noinline __attribute__((noinline)) > > > > >>> > > > > >>> +/** > > > > >>> + * Mark a variable to be mostly read only and place it in a separate section. > > > > >>> + */ > > > > >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) > > > > >> > > > > > > > > > > Hi Ferruh, > > > > > > > > > >> Hi Pavan, > > > > >> > > > > >> Is the section ".read_mostly" treated specially [1] or is this just for grouping > > > > >> symbols together (to reduce cacheline bouncing)? > > > > > > > > > > The section .read_mostly is not treated specially it's just for grouping > > > > > symbols. > > > > > > > > I have encounter with a blog post claiming this is not working: > > > > > > > > " > > > > The problem with the above approach is that once all the __read_mostly variables > > > > are grouped into one section, the remaining "non-read-mostly" variables end-up > > > > together too. This increases the chances that two frequently used elements (in > > > > the "non-read-mostly" region) will end-up competing for the same position (or > > > > cache-line, the basic fixed-sized block for memory<-->cache transfers) in the > > > > cache. Thus frequent accesses will cause excessive cache thrashing on that > > > > particular cache-line thereby degrading the overall system performance. > > > > " > > > > > > > > https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html > > > > > > > > > > The author is concerned about processors with less cache set-associativity, > > > almost all modern processors have >= 16 way set associativity. And the above > > > issue can happen even now when two frequently written global variables are > > > placed next to each other. > > > > > > Currently, we don't have much control over how the global variables are > > > arranged and a single addition/deletion to the global variables causes change > > > in alignment and in some cases minor performance regression. > > > Tagging them as __read_mostly we can easily identify the alignment changes > > > across builds by comparing map files global variable section. > > > > > > I have verified the patch-set on arm64 (16-way set-associative) and didn't > > > notice any performance regression. > > > Did you have a chance to verify if there is any performance regression? > > > > > Is there a performance improvement? It's seems a relatively strange change > > to me, so I'd like to know that it really improves performance in test > > cases. > > We had a performance regression of ~200k between 17.11 and 18.02 due enabling > dpaa/dpaa2 in default config this was due to new global variables being added > and changing the alignment. > Moving read mostly global variables (logtypes/device arrays) to a separate > section helps when tracking performance regression between builds. > So it's of use when debugging, rather than providing a performance boost in and of itself, right? If performance regressions are appearing, should we then see about marking globals with __rte_cache_align to force them all onto difference cachelines? ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only 2018-04-19 15:37 ` Bruce Richardson @ 2018-04-19 15:55 ` Pavan Nikhilesh 0 siblings, 0 replies; 10+ messages in thread From: Pavan Nikhilesh @ 2018-04-19 15:55 UTC (permalink / raw) To: Bruce Richardson, Ferruh Yigit, thomas, jerin.jacob, techboard; +Cc: dev On Thu, Apr 19, 2018 at 04:37:23PM +0100, Bruce Richardson wrote: > On Thu, Apr 19, 2018 at 08:48:33PM +0530, Pavan Nikhilesh wrote: > > On Thu, Apr 19, 2018 at 01:09:58PM +0100, Bruce Richardson wrote: > > > On Thu, Apr 19, 2018 at 02:50:52PM +0530, Pavan Nikhilesh wrote: > > > > On Wed, Apr 18, 2018 at 07:03:06PM +0100, Ferruh Yigit wrote: > > > > > On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote: > > > > > > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote: > > > > > >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote: > > > > > >>> Add macro to mark a variable to be mostly read only and place it in a > > > > > >>> separate section. > > > > > >>> > > > > > >>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > > > > > >>> --- > > > > > >>> > > > > > >>> Group together mostly read only data to avoid cacheline bouncing, also > > > > > >>> useful for auditing purposes. > > > > > >>> > > > > > >>> lib/librte_eal/common/include/rte_common.h | 5 +++++ > > > > > >>> 1 file changed, 5 insertions(+) > > > > > >>> > > > > > >>> diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h > > > > > >>> index 6c5bc5a76..f2ff2e9e6 100644 > > > > > >>> --- a/lib/librte_eal/common/include/rte_common.h > > > > > >>> +++ b/lib/librte_eal/common/include/rte_common.h > > > > > >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) func(void) > > > > > >>> */ > > > > > >>> #define __rte_noinline __attribute__((noinline)) > > > > > >>> > > > > > >>> +/** > > > > > >>> + * Mark a variable to be mostly read only and place it in a separate section. > > > > > >>> + */ > > > > > >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly"))) > > > > > >> > > > > > > > > > > > > Hi Ferruh, > > > > > > > > > > > >> Hi Pavan, > > > > > >> > > > > > >> Is the section ".read_mostly" treated specially [1] or is this just for grouping > > > > > >> symbols together (to reduce cacheline bouncing)? > > > > > > > > > > > > The section .read_mostly is not treated specially it's just for grouping > > > > > > symbols. > > > > > > > > > > I have encounter with a blog post claiming this is not working: > > > > > > > > > > " > > > > > The problem with the above approach is that once all the __read_mostly variables > > > > > are grouped into one section, the remaining "non-read-mostly" variables end-up > > > > > together too. This increases the chances that two frequently used elements (in > > > > > the "non-read-mostly" region) will end-up competing for the same position (or > > > > > cache-line, the basic fixed-sized block for memory<-->cache transfers) in the > > > > > cache. Thus frequent accesses will cause excessive cache thrashing on that > > > > > particular cache-line thereby degrading the overall system performance. > > > > > " > > > > > > > > > > https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html > > > > > > > > > > > > > The author is concerned about processors with less cache set-associativity, > > > > almost all modern processors have >= 16 way set associativity. And the above > > > > issue can happen even now when two frequently written global variables are > > > > placed next to each other. > > > > > > > > Currently, we don't have much control over how the global variables are > > > > arranged and a single addition/deletion to the global variables causes change > > > > in alignment and in some cases minor performance regression. > > > > Tagging them as __read_mostly we can easily identify the alignment changes > > > > across builds by comparing map files global variable section. > > > > > > > > I have verified the patch-set on arm64 (16-way set-associative) and didn't > > > > notice any performance regression. > > > > Did you have a chance to verify if there is any performance regression? > > > > > > > Is there a performance improvement? It's seems a relatively strange change > > > to me, so I'd like to know that it really improves performance in test > > > cases. > > > > We had a performance regression of ~200k between 17.11 and 18.02 due enabling > > dpaa/dpaa2 in default config this was due to new global variables being added > > and changing the alignment. > > Moving read mostly global variables (logtypes/device arrays) to a separate > > section helps when tracking performance regression between builds. > > > So it's of use when debugging, rather than providing a performance boost in > and of itself, right? > > If performance regressions are appearing, should we then see about marking > globals with __rte_cache_align to force them all onto difference > cachelines? I think that would be a bit of overkill considering the number of logtype variables. Currently there are 29 global variables not found in any map file List of globals ("['fw_file', 'mode_8023ad_ports', 'ecore_mz_count', 'igb_filter_rss_list', " "'crc16_ccitt_pmull', 'igb_filter_flex_list', 'rte_crypto_devices', " "'igb_flow_list', 'qman_clk', 'bman_ccsr_map', 'dpaa_portal_key', " "'bman_ip_rev', 'cons_filter', 'rte_rawdevices', 'internal_config', " "'bman_pool_max', 'qman_version', 'qman_ccsr_map', 'rte_event_devices', " "'qman_ip_rev', 'netcfg_interface', 'igb_filter_ntuple_list', " "'igb_filter_ethertype_list', 'skeldev_init_once', 'igb_filter_syn_list', " "'crc32_eth_pmull', 'global_portals_used', 'virtio_hw_internal', " "'vhost_devices']") These are stats without including the logtype variables across all drivers. With logtype variables included there are 70+. ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2018-04-19 15:55 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2018-04-18 15:30 [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Pavan Nikhilesh 2018-04-18 15:30 ` [dpdk-dev] [PATCH 2/2] drivers: mark logtype variables as read mostly Pavan Nikhilesh 2018-04-18 17:43 ` [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only Ferruh Yigit 2018-04-18 17:55 ` Pavan Nikhilesh 2018-04-18 18:03 ` Ferruh Yigit 2018-04-19 9:20 ` Pavan Nikhilesh 2018-04-19 12:09 ` Bruce Richardson 2018-04-19 15:18 ` Pavan Nikhilesh 2018-04-19 15:37 ` Bruce Richardson 2018-04-19 15:55 ` Pavan Nikhilesh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).