pcg_mwc/
gen64.rs

1// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
4// option. This file may not be copied, modified, or distributed
5// except according to those terms.
6
7use core::fmt;
8use rand_core::{Error, le, RngCore, SeedableRng};
9
10#[cfg(feature = "serde1")]
11use serde::{Deserialize, Serialize};
12
13// This is the default multiplier used by MWC.
14const MULTIPLIER: u64 = 0xfeb3_4465_7c0a_f413; //Best spectra for lag 3
15// For testing with a lag of 1, 3, or 4  the following work: 0x7c49_2513_927a_59b3 or 0xa729_8353_f425_0d13
16
17/// A PCG random number generator (MWC X A 256/64 variant).
18///
19/// Permuted Congruential Generator with 256-bit state, internal multiply
20/// with carry Generator, and 64-bit output via a xor and an add.
21#[derive(Clone, PartialEq, Eq)]
22#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
23pub struct Mwc256XXA64 {
24    x1: u64,
25    x2: u64,
26    x3: u64,
27    c: u64,
28}
29
30impl Mwc256XXA64 {
31    /// Construct an instance given two keys.
32    pub fn new(k1: u64, k2: u64) -> Self {
33        // X3 is 0xcafef00dd15ea5e5 (default state from PCG paper because it cannot be 0.
34        // C must be initialized to a value > 1 and < MULTIPLIER
35        Mwc256XXA64::from_state_incr(k1, k2, 0xcafef00dd15ea5e5, 0x14057B7EF767814F)
36    }
37
38    #[inline]
39    fn from_state_incr(x1: u64, x2: u64, x3: u64, c: u64) -> Self {
40        let mut pcg = Mwc256XXA64 { x1, x2, x3, c };
41        //Advance 6 steps to fully mix the keys.
42        pcg.gen6();
43        pcg
44    }
45
46    #[inline]
47    fn gen6(&mut self) -> [u64; 6] {
48        //This is faster than calling `next_u64` 6 times because it avoids the intermediate assignments to the member variables.
49        //For some reason the compiler doesn't figure this out automatically.
50        let mut result = [0; 6];
51        let (low, hi) = multiply(self.x3);
52        result[0] = permute(self.x1, self.x2, self.x3, self.c, low, hi);
53        let (r1, b) = low.overflowing_add(self.c);
54        let c = hi.wrapping_add(b as u64);
55        let (low, hi) = multiply(self.x2);
56        result[1] = permute(r1, self.x1, self.x2, c, low, hi);
57        let (r2, b) = low.overflowing_add(c);
58        let c = hi.wrapping_add(b as u64);
59        let (low, hi) = multiply(self.x1);
60        result[2] = permute(r2, r1, self.x1, c, low, hi);
61        let (r3, b) = low.overflowing_add(c);
62        let c = hi.wrapping_add(b as u64);
63
64        let (low, hi) = multiply(r1);
65        result[3] = permute(r3, r2, r1, c, low, hi);
66        let (r1, b) = low.overflowing_add(c);
67        let c = hi.wrapping_add(b as u64);
68        let (low, hi) = multiply(r2);
69        result[4] = permute(r1, r3, r2, c, low, hi);
70        let (r2, b) = low.overflowing_add(c);
71        let c = hi.wrapping_add(b as u64);
72        let (low, hi) = multiply(r3);
73        result[5] = permute(r2, r1, r3, c, low, hi);
74        let (r3, b) = low.overflowing_add(c);
75        let c = hi.wrapping_add(b as u64);
76
77        self.c = c;
78        self.x1 = r3;
79        self.x2 = r2;
80        self.x3 = r1;
81        return result;
82    }
83
84    #[inline]
85    fn step(&mut self) -> u64 {
86        // prepare the MCG for the next round
87        let (low, hi) = multiply(self.x3);
88        let result = permute(self.x1, self.x2, self.x3, self.c, low, hi);
89        let (x1, b) = low.overflowing_add(self.c);
90        self.x3 = self.x2;
91        self.x2 = self.x1;
92        self.x1 = x1;
93        self.c = hi.wrapping_add(b as u64);
94        result
95    }
96}
97
98#[inline(always)]
99fn multiply(val: u64) -> (u64, u64) {
100    //While this looks like 128 bit math, it compiles to a 64 bit multiply.
101    let t = (val as u128).wrapping_mul(MULTIPLIER as u128);
102    return (t as u64, (t >> 64) as u64);
103}
104
105#[inline(always)]
106fn permute(x1: u64, x2: u64, x3: u64, _c: u64, _low: u64, hi: u64) -> u64 {
107    (x3 ^ x2).wrapping_add(x1 ^ hi)
108}
109
110// Custom Debug implementation that does not expose the internal state
111impl fmt::Debug for Mwc256XXA64 {
112    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113        write!(f, "Mwc256XXA64 {{}}")
114    }
115}
116
117/// We use a single 249-bit seed to initialise the state and select a stream.
118/// One `seed` bit (lowest bit of `seed[8]`) is ignored.
119impl SeedableRng for Mwc256XXA64 {
120    type Seed = [u8; 32];
121
122    fn from_seed(seed: Self::Seed) -> Self {
123        let mut seed_u64 = [0u64; 4];
124        le::read_u64_into(&seed, &mut seed_u64);
125        // c must be < MULTIPLE and not all 1s or 0s
126        let c = (seed_u64[0] & 0x3ffffffffffffff8) | 5;
127        // X3 must be non-zero and not all 1s, hence we discard 2 bits
128        let x3 = (seed_u64[3] << 2) | 1;
129        Mwc256XXA64::from_state_incr(seed_u64[1], seed_u64[2], x3, c)
130    }
131}
132
133impl RngCore for Mwc256XXA64 {
134    #[inline]
135    fn next_u32(&mut self) -> u32 {
136        self.next_u64() as u32
137    }
138
139    #[inline]
140    fn next_u64(&mut self) -> u64 {
141        self.step()
142    }
143
144    #[inline]
145    fn fill_bytes(&mut self, dest: &mut [u8]) {
146        let mut dest_chunks = dest.chunks_exact_mut(6 * 8);
147        for mut dest_chunk in &mut dest_chunks {
148            for &num in self.gen6().iter() {
149                let (l, r) = dest_chunk.split_at_mut(8);
150                l.copy_from_slice(&num.to_le_bytes());
151                dest_chunk = r;
152            }
153        }
154        for dest_chunk in dest_chunks.into_remainder().chunks_mut(8) {
155            dest_chunk.copy_from_slice(&self.step().to_le_bytes()[..dest_chunk.len()]);
156        }
157    }
158
159    #[inline(always)]
160    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
161        self.fill_bytes(dest);
162        Ok(())
163    }
164}
165