From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ie0-f171.google.com (mail-ie0-f171.google.com [209.85.223.171]) by dpdk.org (Postfix) with ESMTP id 335E437A6 for ; Sat, 28 Mar 2015 01:02:28 +0100 (CET) Received: by iecvj10 with SMTP id vj10so82510078iec.0 for ; Fri, 27 Mar 2015 17:02:27 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:mime-version:in-reply-to:references:date :message-id:subject:from:to:cc:content-type; bh=sqEnGbE0ez1xoiFfvgYhqgLjdM3CEavdB+BxSpav7Ls=; b=IUY0bKu/8eAsUB4NC86vOqNEoLEM8foFkK4p2y7o+byD7Nwl2OFPC8BTyGfGC3nmqj CO3mWXS/Ly5mr3ZjLitQQfr6ynDeaGlxoQQrbos6gGeKXzbgw7tlXp5u7JMHDnqvc7V+ PoVmYEA0xu9CUkprhtukeSAeIfg3IAFgBAHLCCPsLBPyqd2wIrqQW9XegWzLkVoN6IZW 59eX73k05YN6/tGvbC0NAfy/S5KOOTWqxLWLQGC8js+b8tCHhCa28QyF/Wu+jkn8ZtQl XXsP72lZRw8t/tMRa8zpFAbZDTamMI3IePNGkuKaSQJA2W843vn7d20ah2O7/nXflOHT kwSQ== X-Gm-Message-State: ALoCoQlgtJL15O0ORv1paflWfVdA4zPl3xZpX5dRpPRf1HnFrHOAJwt7w1IwqGV5E2apPa+smrgI MIME-Version: 1.0 X-Received: by 10.107.26.195 with SMTP id a186mr32705821ioa.78.1427500947567; Fri, 27 Mar 2015 17:02:27 -0700 (PDT) Received: by 10.64.58.129 with HTTP; Fri, 27 Mar 2015 17:02:27 -0700 (PDT) In-Reply-To: References: <1426609081-47774-1-git-send-email-rsanford@akamai.com> Date: Fri, 27 Mar 2015 17:02:27 -0700 Message-ID: From: Stephen Hemminger To: "Sanford, Robert" Content-Type: text/plain; charset=UTF-8 X-Content-Filtered-By: Mailman/MimeDel 2.1.15 Cc: "dev@dpdk.org" Subject: Re: [dpdk-dev] [RFC PATCH] eal: rte_rand yields only 62 random bits X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 28 Mar 2015 00:02:28 -0000 Please do this work upstream in glibc rather than in the corner case of DPDK. On Fri, Mar 27, 2015 at 9:38 AM, Sanford, Robert wrote: > Never mind ... I cancel the previous suggestion. After further reading on > RNGs, I believe we should abandon the use of lrand48() and implement our > own RNG based on the so-called KISS family of RNGs originally proposed by > the late George Marsaglia. In his excellent paper, "Good Practice in > (Pseudo) Random Number Generation for Bioinformatics Applications", David > Jones (UCL Bioinformatics Group) describes a few variants of KISS > generators. This paper, and Robert G. Brown's (Duke Univ.) comprehensive > "Dieharder" random number test suite work show that KISS RNGs are simple > and fast, yet high quality. > > Something like JLKISS64(), with state kept in TLS, would be ideal for DPDK > use. In limited experiments, I found JLKISS64() (not inlined, compiles to > ~40 instructions) to be ~4 times faster than rte_rand(). This is probably > because JLKISS64() achieves integer-instruction parallelism, while > rte_rand(), with its two calls to lrand48(), nrand48_r(), and > __drand48_iterate(), does not (and all those calls!). > > Here is the JLKISS64() function, as it appears in Jones's GoodPracticeRNG > paper: > > --- > > > > > > > > > > /* Public domain code for JLKISS64 > RNG - long period KISS RNG > producing 64-bit results */ > > unsigned long long x = > 123456789123ULL,y = 987654321987ULL; /* Seed > variables */ > unsigned int z1 = 43219876, c1 = 6543217, z2 = 21987643, c2 = 1732654; /* > Seed variables */ > > unsigned long long JLKISS64() > { > > unsigned long long t; > > x = 1490024343005336237ULL * x + > 123456789; > y ^= y << 21; y ^= y >> 17; y ^= y << 30; /* Do not set y=0! */ > t = 4294584393ULL * z1 + c1; c1 = t >> 32; z1 = t; > t = 4246477509ULL * z2 + c2; c2 = t >> 32; z2 = t; > > return x + y + z1 + ((unsigned > long long)z2 << 32); /* Return 64-bit > result */ > } > > > > > > > > > -- > Regards, > Robert > > > > >The implementation of rte_rand() returns only 62 bits of > >pseudo-randomness, because the underlying calls to lrand48() > >"return non-negative long integers uniformly distributed > >between 0 and 2^31." > > > >We have written a potential fix, but before we spend more > >time testing and refining it, I wanted to check with you > >guys. > > > >We switched to using the reentrant versions of [ls]rand48, > >and maintain per-lcore state. We need ~2.06 calls to > >lrand48_r(), per call to rte_rand(). > > > >Do you agree with the approach we've taken in this patch? > > > > > >Thanks, > >Robert > > > >--- > > lib/librte_eal/common/include/rte_random.h | 33 > >+++++++++++++++++++++++++-- > > lib/librte_eal/linuxapp/eal/eal_thread.c | 7 ++++++ > > 2 files changed, 37 insertions(+), 3 deletions(-) > > > >diff --git a/lib/librte_eal/common/include/rte_random.h > >b/lib/librte_eal/common/include/rte_random.h > >index 24ae836..b9248cd 100644 > >--- a/lib/librte_eal/common/include/rte_random.h > >+++ b/lib/librte_eal/common/include/rte_random.h > >@@ -46,6 +46,17 @@ extern "C" { > > > > #include > > #include > >+#include > >+#include > >+ > >+struct rte_rand_data { > >+ struct drand48_data _dr48; > >+ uint32_t _hi_bits; > >+ uint8_t _bits_left; > >+}; > >+ > >+RTE_DECLARE_PER_LCORE(struct rte_rand_data, _rand_data); > >+ > > > > /** > > * Seed the pseudo-random generator. > >@@ -60,7 +71,7 @@ extern "C" { > > static inline void > > rte_srand(uint64_t seedval) > > { > >- srand48((long unsigned int)seedval); > >+ srand48_r((long unsigned int)seedval, > &RTE_PER_LCORE(_rand_data)._dr48); > > } > > > > /** > >@@ -76,10 +87,26 @@ rte_srand(uint64_t seedval) > > static inline uint64_t > > rte_rand(void) > > { > >+ struct rte_rand_data *rd = &RTE_PER_LCORE(_rand_data); > > uint64_t val; > >- val = lrand48(); > >+ uint32_t hi_bits; > >+ long int result; > >+ > >+ if (unlikely(rd->_bits_left < 2)) { > >+ lrand48_r(&rd->_dr48, &result); > >+ rd->_hi_bits |= (uint32_t)result << (1 - rd->_bits_left); > >+ rd->_bits_left += 31; > >+ } > >+ > >+ hi_bits = rd->_hi_bits; > >+ lrand48_r(&rd->_dr48, &result); > >+ val = (uint32_t)result | (hi_bits & 0x8000000); > > val <<= 32; > >- val += lrand48(); > >+ hi_bits <<= 1; > >+ lrand48_r(&rd->_dr48, &result); > >+ val |= (uint32_t)result | (hi_bits & 0x8000000); > >+ rd->_hi_bits = hi_bits << 1; > >+ rd->_bits_left -= 2; > > return val; > > } > > > >diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c > >b/lib/librte_eal/linuxapp/eal/eal_thread.c > >index 5635c7d..08e7f72 100644 > >--- a/lib/librte_eal/linuxapp/eal/eal_thread.c > >+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c > >@@ -52,6 +52,8 @@ > > #include > > #include > > #include > >+#include > >+#include > > > > #include "eal_private.h" > > #include "eal_thread.h" > >@@ -59,6 +61,8 @@ > > RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; > > RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; > > RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); > >+RTE_DEFINE_PER_LCORE(struct rte_rand_data, _rand_data); > >+ > > > > /* > > * Send a message to a slave lcore identified by slave_id to call a > >@@ -147,6 +151,9 @@ eal_thread_loop(__attribute__((unused)) void *arg) > > /* set the lcore ID in per-lcore memory area */ > > RTE_PER_LCORE(_lcore_id) = lcore_id; > > > >+ /* seed per-lcore PRNG */ > >+ rte_srand(rte_rdtsc()); > >+ > > /* set CPU affinity */ > > if (eal_thread_set_affinity() < 0) > > rte_panic("cannot set affinity\n"); > >-- > >1.7.1 > > > >