From: Linhaifeng <haifeng.lin@huawei.com>
To: Ravi Kerur <rkerur@gmail.com>, <dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v2] Implement memcmp using AVX/SSE instructions.
Date: Tue, 12 May 2015 16:13:09 +0800 [thread overview]
Message-ID: <5551B615.5060405@huawei.com> (raw)
In-Reply-To: <1431119989-32124-1-git-send-email-rkerur@gmail.com>
Hi, Ravi Kerur
On 2015/5/9 5:19, Ravi Kerur wrote:
> Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu
> 14.04 x86_64 shows comparisons using AVX/SSE instructions taking 1/3rd
> CPU ticks for 16, 32, 48 and 64 bytes comparison. In addition,
I had write a program to test rte_memcmp and I have a question about the result.
Why cost same CPU ticks for 128 256 512 1024 1500 bytes? Is there any problem in
my test?
[root@localhost test]# gcc avx_test.c -O3 -I /data/linhf/v2r2c00/open-source/dpdk/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/ -mavx2 -DRTE_MACHINE_CPUFLAG_AVX2
[root@localhost test]# ./a.out 0
each test run 100000000 times
copy 16 bytes costs average 7(rte_memcmp) 10(memcmp) ticks
copy 32 bytes costs average 9(rte_memcmp) 11(memcmp) ticks
copy 64 bytes costs average 6(rte_memcmp) 13(memcmp) ticks
copy 128 bytes costs average 11(rte_memcmp) 14(memcmp) ticks
copy 256 bytes costs average 9(rte_memcmp) 14(memcmp) ticks
copy 512 bytes costs average 9(rte_memcmp) 14(memcmp) ticks
copy 1024 bytes costs average 9(rte_memcmp) 14(memcmp) ticks
copy 1500 bytes costs average 11(rte_memcmp) 14(memcmp) ticks
[root@localhost test]# ./a.out 1
each test run 100000000 times
copy 16 bytes costs average 2(rte_memcpy) 10(memcpy) ticks
copy 32 bytes costs average 2(rte_memcpy) 10(memcpy) ticks
copy 64 bytes costs average 3(rte_memcpy) 10(memcpy) ticks
copy 128 bytes costs average 7(rte_memcpy) 12(memcpy) ticks
copy 256 bytes costs average 9(rte_memcpy) 23(memcpy) ticks
copy 512 bytes costs average 14(rte_memcpy) 34(memcpy) ticks
copy 1024 bytes costs average 37(rte_memcpy) 61(memcpy) ticks
copy 1500 bytes costs average 62(rte_memcpy) 87(memcpy) ticks
Here is my program:
#include <stdio.h>
#include <rte_cycles.h>
#include <smmintrin.h>
#include <rte_memcpy.h>
#include <rte_memcmp.h>
#define TIMES 100000000L
void test_memcpy(size_t n)
{
uint64_t start, end, i, start2, end2;
uint8_t *src, *dst;
src = (uint8_t*)malloc(n * sizeof(uint8_t));
dst = (uint8_t*)malloc(n * sizeof(uint8_t));
start = rte_rdtsc();
for (i = 0; i < TIMES; i++) {
rte_memcpy(dst, src, n);
}
end = rte_rdtsc();
start2 = rte_rdtsc();
for (i = 0; i < TIMES; i++) {
memcpy(dst, src, n);
}
end2 = rte_rdtsc();
free(src);
free(dst);
printf("copy %u bytes costs average %llu(rte_memcpy) %llu(memcpy) ticks\n", n, (end - start)/TIMES, (end2 - start2)/TIMES);
}
int test_memcmp(size_t n)
{
uint64_t start, end, i, start2, end2, j;
uint8_t *src, *dst;
int *ret;
src = (uint8_t*)malloc(n * sizeof(uint8_t));
dst = (uint8_t*)malloc(n * sizeof(uint8_t));
ret = (int*)malloc(TIMES * sizeof(int));
start = rte_rdtsc();
for (i = 0; i < TIMES; i++) {
ret[i] = rte_memcmp(dst, src, n);
}
end = rte_rdtsc();
start2 = rte_rdtsc();
for (i = 0; i < TIMES; i++) {
ret[i] = memcmp(dst, src, n);
}
end2 = rte_rdtsc();
// avoid gcc to optimize memcmp
for (i = 0; i < TIMES; i++) {
t += ret[i];
}
free(src);
free(dst);
printf("copy %u bytes costs average %llu(rte_memcmp) %llu(memcmp) ticks\n", n, (end - start)/TIMES, (end2 - start2)/TIMES);
return t;
}
int main(int narg, char** args)
{
printf("each test run %llu times\n", TIMES);
if (narg < 2) {
printf("usage:./avx_test 0/1 1:test memcpy 0:test memcmp\n");
return -1;
}
if (atoi(args[1])) {
test_memcpy(16);
test_memcpy(32);
test_memcpy(64);
test_memcpy(128);
test_memcpy(256);
test_memcpy(512);
test_memcpy(1024);
test_memcpy(1500);
} else {
test_memcmp(16);
test_memcmp(32);
test_memcmp(64);
test_memcmp(128);
test_memcmp(256);
test_memcmp(512);
test_memcmp(1024);
test_memcmp(1500);
}
}
next prev parent reply other threads:[~2015-05-12 8:13 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-08 21:19 [dpdk-dev] [PATCH v2] Implement rte_memcmp with " Ravi Kerur
2015-05-08 21:19 ` [dpdk-dev] [PATCH v2] Implement memcmp using " Ravi Kerur
2015-05-08 22:29 ` Matt Laswell
2015-05-08 22:54 ` Ravi Kerur
2015-05-08 23:25 ` Matt Laswell
2015-05-11 9:51 ` Ananyev, Konstantin
2015-05-11 17:42 ` Ravi Kerur
[not found] ` <2601191342CEEE43887BDE71AB9772582142E44A@irsmsx105.ger.corp.intel.com>
2015-05-11 19:35 ` Ananyev, Konstantin
2015-05-11 20:46 ` Ravi Kerur
2015-05-11 22:29 ` Don Provan
2015-05-13 1:16 ` Ravi Kerur
2015-05-13 9:03 ` Bruce Richardson
2015-05-13 20:08 ` Ravi Kerur
2015-05-13 12:21 ` Jay Rolette
2015-05-13 20:07 ` Ravi Kerur
[not found] ` <2601191342CEEE43887BDE71AB9772582142EBB5@irsmsx105.ger.corp.intel.com>
2015-05-13 10:12 ` Ananyev, Konstantin
2015-05-13 20:06 ` Ravi Kerur
2015-05-12 8:13 ` Linhaifeng [this message]
2015-05-13 1:18 ` Ravi Kerur
2015-05-13 7:22 ` Linhaifeng
2015-05-13 20:00 ` Ravi Kerur
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5551B615.5060405@huawei.com \
--to=haifeng.lin@huawei.com \
--cc=dev@dpdk.org \
--cc=rkerur@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).