Skip to main content

ferray_random/bitgen/
mod.rs

1// ferray-random: BitGenerator trait and implementations
2
3use ferray_core::FerrayError;
4
5mod mt19937;
6mod pcg64;
7mod pcg64dxsm;
8mod philox;
9mod seed_sequence;
10mod sfc64;
11mod xoshiro256;
12
13/// `SplitMix64`: a fast 64-bit hash-based PRNG used exclusively for
14/// seeding other generators (Xoshiro256**, PCG64) from a single u64
15/// seed. The function was previously copy-pasted into both
16/// `pcg64.rs` and `xoshiro256.rs` (#259).
17pub(crate) const fn splitmix64(state: &mut u64) -> u64 {
18    *state = state.wrapping_add(0x9e37_79b9_7f4a_7c15);
19    let mut z = *state;
20    z = (z ^ (z >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9);
21    z = (z ^ (z >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb);
22    z ^ (z >> 31)
23}
24
25pub use mt19937::MT19937;
26pub use pcg64::Pcg64;
27pub use pcg64dxsm::Pcg64Dxsm;
28pub use philox::Philox;
29pub use seed_sequence::SeedSequence;
30pub use sfc64::Sfc64;
31pub use xoshiro256::Xoshiro256StarStar;
32
33/// Trait for pluggable pseudo-random number generators.
34///
35/// All `BitGenerators` are `Send` (can be transferred between threads) but NOT `Sync`
36/// (they are stateful and require `&mut self`).
37///
38/// Concrete implementations: [`Pcg64`], [`Pcg64Dxsm`], [`Philox`],
39/// [`Xoshiro256StarStar`], [`MT19937`], [`Sfc64`].
40pub trait BitGenerator: Send {
41    /// Generate the next 64-bit unsigned integer.
42    fn next_u64(&mut self) -> u64;
43
44    /// Create a new generator seeded from a single `u64`.
45    fn seed_from_u64(seed: u64) -> Self
46    where
47        Self: Sized;
48
49    /// Advance the generator state by a large step (2^128 for Xoshiro256**).
50    ///
51    /// Returns `Some(())` if jump is supported, `None` otherwise.
52    /// After calling `jump`, the generator's state has advanced as if
53    /// `2^128` calls to `next_u64` had been made.
54    fn jump(&mut self) -> Option<()>;
55
56    /// Create a new generator from a seed and a stream ID.
57    ///
58    /// Returns `Some(Self)` if the generator supports stream-based parallelism
59    /// (e.g., Philox), `None` otherwise.
60    fn stream(seed: u64, stream_id: u64) -> Option<Self>
61    where
62        Self: Sized;
63
64    /// Generate a uniformly distributed `f64` in [0, 1).
65    ///
66    /// Uses the upper 53 bits of `next_u64()` for full double precision.
67    fn next_f64(&mut self) -> f64 {
68        (self.next_u64() >> 11) as f64 * (1.0 / (1u64 << 53) as f64)
69    }
70
71    /// Generate a uniformly distributed `f32` in [0, 1).
72    fn next_f32(&mut self) -> f32 {
73        (self.next_u64() >> 40) as f32 * (1.0 / (1u64 << 24) as f32)
74    }
75
76    /// Fill a byte slice with random bytes.
77    fn fill_bytes(&mut self, dest: &mut [u8]) {
78        let mut i = 0;
79        while i + 8 <= dest.len() {
80            let val = self.next_u64();
81            dest[i..i + 8].copy_from_slice(&val.to_le_bytes());
82            i += 8;
83        }
84        if i < dest.len() {
85            let val = self.next_u64();
86            let bytes = val.to_le_bytes();
87            for (j, byte) in dest[i..].iter_mut().enumerate() {
88                *byte = bytes[j];
89            }
90        }
91    }
92
93    /// Serialize the full internal state of this generator to a
94    /// little-endian byte vector. Pair with [`set_state_bytes`] to
95    /// restore.
96    ///
97    /// Default impl returns an error — concrete generators that
98    /// support serialization override both methods. Used to checkpoint
99    /// reproducible experiments (#453), the rough Rust equivalent of
100    /// numpy's `Generator.bit_generator.state` / `__getstate__` /
101    /// `__setstate__`.
102    ///
103    /// # Errors
104    /// `FerrayError::InvalidValue` if the generator does not implement
105    /// state serialization.
106    fn state_bytes(&self) -> Result<Vec<u8>, FerrayError> {
107        Err(FerrayError::invalid_value(
108            "this BitGenerator does not implement state_bytes",
109        ))
110    }
111
112    /// Restore the generator's state from previously-serialized bytes
113    /// produced by [`state_bytes`].
114    ///
115    /// # Errors
116    /// `FerrayError::InvalidValue` if the byte length doesn't match
117    /// the expected state size, or if the embedded state is invalid
118    /// (e.g. all-zero state for Xoshiro256**).
119    fn set_state_bytes(&mut self, bytes: &[u8]) -> Result<(), FerrayError> {
120        let _ = bytes;
121        Err(FerrayError::invalid_value(
122            "this BitGenerator does not implement set_state_bytes",
123        ))
124    }
125
126    /// Generate a `u64` in the range `[0, bound)` using rejection sampling.
127    fn next_u64_bounded(&mut self, bound: u64) -> u64 {
128        if bound == 0 {
129            return 0;
130        }
131        // Lemire's nearly divisionless method
132        let mut x = self.next_u64();
133        let mut m = (x as u128) * (bound as u128);
134        let mut l = m as u64;
135        if l < bound {
136            let threshold = bound.wrapping_neg() % bound;
137            while l < threshold {
138                x = self.next_u64();
139                m = (x as u128) * (bound as u128);
140                l = m as u64;
141            }
142        }
143        (m >> 64) as u64
144    }
145}
146
147#[cfg(test)]
148mod state_tests {
149    //! Round-trip tests for `state_bytes` / `set_state_bytes` (#453).
150    //!
151    //! For each concrete BitGenerator: capture state, draw N values,
152    //! roll back to the captured state, draw N more — outputs must
153    //! match exactly.
154
155    use super::*;
156
157    fn roundtrip<B: BitGenerator + Sized>(make: impl Fn() -> B, expected_size: usize) {
158        let mut a = make();
159        // Burn a few values so we're not at the seeded initial state.
160        for _ in 0..7 {
161            a.next_u64();
162        }
163        let snapshot = a.state_bytes().unwrap();
164        assert_eq!(
165            snapshot.len(),
166            expected_size,
167            "{} expected state size {expected_size}, got {}",
168            std::any::type_name::<B>(),
169            snapshot.len()
170        );
171
172        let mut from_a: Vec<u64> = Vec::with_capacity(64);
173        for _ in 0..64 {
174            from_a.push(a.next_u64());
175        }
176
177        let mut b = make();
178        b.set_state_bytes(&snapshot).unwrap();
179        let mut from_b: Vec<u64> = Vec::with_capacity(64);
180        for _ in 0..64 {
181            from_b.push(b.next_u64());
182        }
183        assert_eq!(
184            from_a,
185            from_b,
186            "round-trip diverged for {}",
187            std::any::type_name::<B>()
188        );
189    }
190
191    #[test]
192    fn roundtrip_xoshiro256() {
193        roundtrip(|| Xoshiro256StarStar::seed_from_u64(42), 32);
194    }
195
196    #[test]
197    fn roundtrip_sfc64() {
198        roundtrip(|| Sfc64::seed_from_u64(0xc0ffee), 32);
199    }
200
201    #[test]
202    fn roundtrip_pcg64() {
203        roundtrip(|| Pcg64::seed_from_u64(7), 32);
204    }
205
206    #[test]
207    fn roundtrip_pcg64dxsm() {
208        roundtrip(|| Pcg64Dxsm::seed_from_u64(7), 32);
209    }
210
211    #[test]
212    fn roundtrip_philox() {
213        roundtrip(|| Philox::seed_from_u64(123), 44);
214    }
215
216    #[test]
217    fn roundtrip_mt19937() {
218        roundtrip(|| MT19937::seed_from_u64(2026), 312 * 8 + 8);
219    }
220
221    #[test]
222    fn xoshiro_rejects_all_zero_state() {
223        let mut g = Xoshiro256StarStar::seed_from_u64(0);
224        let zeros = vec![0u8; 32];
225        assert!(g.set_state_bytes(&zeros).is_err());
226    }
227
228    #[test]
229    fn pcg64_rejects_even_inc() {
230        let mut g = Pcg64::seed_from_u64(0);
231        let mut bad = vec![0u8; 32];
232        bad[16] = 2; // inc low byte even
233        assert!(g.set_state_bytes(&bad).is_err());
234    }
235
236    #[test]
237    fn philox_rejects_oversize_buf_idx() {
238        let mut g = Philox::seed_from_u64(0);
239        let mut bad = vec![0u8; 44];
240        bad[40] = 5; // buf_idx > 4
241        assert!(g.set_state_bytes(&bad).is_err());
242    }
243
244    #[test]
245    fn wrong_length_returns_error() {
246        let mut g = Xoshiro256StarStar::seed_from_u64(0);
247        assert!(g.set_state_bytes(&[0u8; 16]).is_err());
248    }
249}