From: Stephen Hemminger <stephen@networkplumber.org>
To: "Ruifeng Wang (Arm Technology China)" <Ruifeng.Wang@arm.com>
Cc: "bruce.richardson@intel.com" <bruce.richardson@intel.com>,
"vladimir.medvedkin@intel.com" <vladimir.medvedkin@intel.com>,
"dev@dpdk.org" <dev@dpdk.org>,
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>,
"Gavin Hu (Arm Technology China)" <Gavin.Hu@arm.com>,
nd <nd@arm.com>
Subject: Re: [dpdk-dev] [PATCH v3 1/3] lib/lpm: not inline unnecessary functions
Date: Thu, 27 Jun 2019 21:34:50 -0700 [thread overview]
Message-ID: <20190627213450.30082af6@hermes.lan> (raw)
In-Reply-To: <AM0PR08MB441863618D66286668D4969B9EFC0@AM0PR08MB4418.eurprd08.prod.outlook.com>
On Fri, 28 Jun 2019 02:44:54 +0000
"Ruifeng Wang (Arm Technology China)" <Ruifeng.Wang@arm.com> wrote:
> >
> > > Tests showed that the function inlining caused performance drop on
> > > some x86 platforms with the memory ordering patches applied.
> > > By force no-inline functions, the performance was better than before
> > > on x86 and no impact to arm64 platforms.
> > >
> > > Suggested-by: Medvedkin Vladimir <vladimir.medvedkin@intel.com>
> > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> > {
> >
> > Do you actually need to force noinline or is just taking of inline enough?
> > In general, letting compiler decide is often best practice.
>
> The force noinline is an optimization for x86 platforms to keep rte_lpm_add() API
> performance with memory ordering applied.
I don't think you answered my question. What does a recent version of GCC do
if you drop the inline.
Actually all the functions in rte_lpm should drop inline.
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 6b7b28a2e431..ffe07e980864 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -399,7 +399,7 @@ MAP_STATIC_SYMBOL(void rte_lpm_free(struct rte_lpm *lpm),
* are stored in the rule table from 0 - 31.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
-static inline int32_t
+static int32_t
rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
uint8_t next_hop)
{
@@ -471,7 +471,7 @@ rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
return rule_index;
}
-static inline int32_t
+static int32_t
rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
uint32_t next_hop)
{
@@ -547,7 +547,7 @@ rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
* Delete a rule from the rule table.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
-static inline void
+static void
rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
{
int i;
@@ -570,7 +570,7 @@ rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
lpm->rule_info[depth - 1].used_rules--;
}
-static inline void
+static void
rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
{
int i;
@@ -597,7 +597,7 @@ rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
* Finds a rule in rule table.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
-static inline int32_t
+static int32_t
rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
{
uint32_t rule_gindex, last_rule, rule_index;
@@ -618,7 +618,7 @@ rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
return -EINVAL;
}
-static inline int32_t
+static int32_t
rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
{
uint32_t rule_gindex, last_rule, rule_index;
@@ -642,7 +642,7 @@ rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
/*
* Find, clean and allocate a tbl8.
*/
-static inline int32_t
+static int32_t
tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
{
uint32_t group_idx; /* tbl8 group index. */
@@ -669,7 +669,7 @@ tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
return -ENOSPC;
}
-static inline int32_t
+static int32_t
tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
{
uint32_t group_idx; /* tbl8 group index. */
@@ -709,7 +709,7 @@ tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start)
tbl8[tbl8_group_start].valid_group = INVALID;
}
-static inline int32_t
+static int32_t
add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t next_hop)
{
@@ -777,7 +777,7 @@ add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
return 0;
}
-static inline int32_t
+static int32_t
add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint32_t next_hop)
{
@@ -846,7 +846,7 @@ add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
return 0;
}
-static inline int32_t
+static int32_t
add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
uint8_t next_hop)
{
@@ -971,7 +971,7 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
return 0;
}
-static inline int32_t
+static int32_t
add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
uint32_t next_hop)
{
@@ -1244,7 +1244,7 @@ BIND_DEFAULT_SYMBOL(rte_lpm_is_rule_present, _v1604, 16.04);
MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip,
uint8_t depth, uint32_t *next_hop), rte_lpm_is_rule_present_v1604);
-static inline int32_t
+static int32_t
find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t *sub_rule_depth)
{
@@ -1266,7 +1266,7 @@ find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
return -1;
}
-static inline int32_t
+static int32_t
find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint8_t *sub_rule_depth)
{
@@ -1288,7 +1288,7 @@ find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
return -1;
}
-static inline int32_t
+static int32_t
delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
@@ -1381,7 +1381,7 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
return 0;
}
-static inline int32_t
+static int32_t
delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
@@ -1483,7 +1483,7 @@ delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
* Return of value > -1 means tbl8 is in use but has all the same values and
* thus can be recycled
*/
-static inline int32_t
+static int32_t
tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
uint32_t tbl8_group_start)
{
@@ -1530,7 +1530,7 @@ tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
return -EINVAL;
}
-static inline int32_t
+static int32_t
tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
uint32_t tbl8_group_start)
{
@@ -1577,7 +1577,7 @@ tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
return -EINVAL;
}
-static inline int32_t
+static int32_t
delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
@@ -1655,7 +1655,7 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
return 0;
}
-static inline int32_t
+static int32_t
delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
next prev parent reply other threads:[~2019-06-28 4:35 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-27 9:37 Ruifeng Wang
2019-06-27 9:37 ` [dpdk-dev] [PATCH v3 2/3] lib/lpm: memory orderings to avoid race conditions for v1604 Ruifeng Wang
2019-06-27 9:37 ` [dpdk-dev] [PATCH v3 3/3] lib/lpm: memory orderings to avoid race conditions for v20 Ruifeng Wang
2019-06-28 13:33 ` Medvedkin, Vladimir
2019-06-29 17:35 ` Honnappa Nagarahalli
2019-07-05 13:45 ` Alex Kiselev
2019-07-05 16:56 ` Medvedkin, Vladimir
2019-07-01 7:08 ` Ruifeng Wang (Arm Technology China)
2019-06-27 15:24 ` [dpdk-dev] [PATCH v3 1/3] lib/lpm: not inline unnecessary functions Stephen Hemminger
2019-06-28 2:44 ` Ruifeng Wang (Arm Technology China)
2019-06-28 4:34 ` Stephen Hemminger [this message]
2019-06-28 5:48 ` Ruifeng Wang (Arm Technology China)
2019-06-28 13:47 ` Medvedkin, Vladimir
2019-06-28 13:57 ` Honnappa Nagarahalli
2019-06-28 14:16 ` Medvedkin, Vladimir
2019-06-28 15:35 ` Stephen Hemminger
2019-07-01 6:44 ` Ruifeng Wang (Arm Technology China)
2019-07-05 10:40 ` Medvedkin, Vladimir
2019-07-05 10:58 ` Ruifeng Wang (Arm Technology China)
2019-07-05 10:31 ` Medvedkin, Vladimir
2019-07-05 13:37 ` Alex Kiselev
2019-07-05 16:53 ` Medvedkin, Vladimir
2019-06-28 13:38 ` Medvedkin, Vladimir
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190627213450.30082af6@hermes.lan \
--to=stephen@networkplumber.org \
--cc=Gavin.Hu@arm.com \
--cc=Honnappa.Nagarahalli@arm.com \
--cc=Ruifeng.Wang@arm.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=nd@arm.com \
--cc=vladimir.medvedkin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).