xorshiftr_wide/
lib.rs

1const DEFAULT_SHL_FIRST: bool = false;
2const DEFAULT_SHL: u32 = 17;
3const DEFAULT_SHR: u32 = 23;
4
5/// A prng designed for autovectorized filling of buffers with random bits,
6/// built from several lanes of individual modified 'xorshiftR+' prngs.
7///
8/// The `LANES` const generic's default of 16 compiles well on x86-64 with either
9/// 128-bit or 256-bit SIMD registers available. Tweaks may squeeze out higher
10/// performance on other architectures, but much lower values produce lower
11/// quality output.
12#[derive(Clone, Copy, Debug)]
13pub struct XorshiftrWide<const LANES: usize = 16> {
14    state: [[u64; LANES]; 2],
15}
16impl<const LANES: usize> XorshiftrWide<LANES> {
17    /// Reseeds an existing prng instance, seeded with a source of randomness provided by the user.
18    pub fn reseed(&mut self, mut provide_random_u64: impl FnMut() -> u64) {
19        while {
20            for arr in &mut self.state {
21                for ptr in arr {
22                    *ptr = provide_random_u64();
23                }
24            }
25            let mut need_to_reseed = false;
26            // Check if any lane's two state u64s are the same
27            // In testing on smaller bit widths, abnormally short periods included a state with both the same
28            // In the rare event that we have two of the same u64 in a lane, we should reseed
29            for i in 0..LANES {
30                need_to_reseed |= self.state[0][i] == self.state[1][i];
31            }
32            // Check if any two lanes' entire states are the same
33            for left in 0..LANES {
34                for right in (left + 1)..LANES {
35                    let top_same = self.state[0][left] == self.state[0][right];
36                    let bottom_same = self.state[1][left] == self.state[1][right];
37                    let either_same = top_same | bottom_same;
38                    need_to_reseed |= either_same;
39                }
40            }
41            need_to_reseed
42        } {
43            // It's astronomically unlikely that we ever need to repeat seeding here,
44            // so avoiding branches during the above checks and dropping a cold hint seems reasonable.
45            cold();
46        }
47    }
48    /// Creates a new prng instance, seeded with a source of randomness provided by the user.
49    pub fn new(provide_random_u64: impl FnMut() -> u64) -> Self {
50        let mut new_state = Self {
51            state: [[0; LANES]; 2],
52        };
53        new_state.reseed(provide_random_u64);
54        new_state
55    }
56    /// Core function to fill a buffer with random data.
57    /// This function is generic over the shift parameters to allow for
58    /// different configurations of the algorithm.
59    #[inline(never)]
60    fn fill_core<const SHL_FIRST: bool, const SHL_BY: u32, const SHR_BY: u32>(
61        &mut self,
62        buffer: &mut [u64],
63    ) {
64        let mut exact_width_chunks = buffer.chunks_exact_mut(LANES);
65        for chunk in exact_width_chunks.by_ref() {
66            let chunk_as_array: &mut [u64; LANES] = unsafe { chunk.try_into().unwrap_unchecked() };
67            for i in 0..LANES {
68                let mut x = self.state[0][i];
69                let y = self.state[1][i];
70                self.state[0][i] = y;
71                if const { SHL_FIRST } {
72                    x ^= x << SHL_BY;
73                    x ^= x >> SHR_BY;
74                } else {
75                    x ^= x >> SHR_BY;
76                    x ^= x << SHL_BY;
77                }
78                self.state[1][i] = x.wrapping_add(y);
79                chunk_as_array[i] = x;
80            }
81        }
82        let tail = exact_width_chunks.into_remainder();
83        if !tail.is_empty() {
84            cold();
85            let mut temporary_buffer = [0u64; LANES];
86            self.fill_core::<SHL_FIRST, SHL_BY, SHR_BY>(&mut temporary_buffer[..]);
87            let qty_to_copy = tail.len();
88            let random_bits_to_copy = &temporary_buffer[..qty_to_copy];
89            tail.copy_from_slice(random_bits_to_copy);
90        }
91    }
92    /// Fills a slice of u64 with random data.
93    #[inline]
94    pub fn fill_u64_buffer(&mut self, destination_buffer: &mut [u64]) {
95        self.fill_core::<DEFAULT_SHL_FIRST, DEFAULT_SHL, DEFAULT_SHR>(destination_buffer);
96    }
97}
98
99pub(crate) use private_utils::*;
100pub(crate) mod private_utils {
101    #[inline(always)]
102    #[cold]
103    pub(crate) fn cold() {}
104}