Skip to main content

ferray_random/bitgen/
philox.rs

1// ferray-random: Philox 4x32 counter-based BitGenerator
2//
3// Reference: Salmon et al., "Parallel Random Numbers: As Easy as 1, 2, 3",
4// SC '11: Proceedings of the 2011 International Conference for High
5// Performance Computing.
6//
7// Philox 4x32-10: 4 words of 32 bits, 10 rounds.
8// Supports stream IDs natively (the key encodes the stream).
9
10use super::BitGenerator;
11
12/// Philox 4x32-10 counter-based pseudo-random number generator.
13///
14/// This generator natively supports stream IDs, making it ideal for
15/// parallel generation. Each (seed, `stream_id`) pair produces an
16/// independent, non-overlapping output sequence.
17///
18/// # Example
19/// ```
20/// use ferray_random::bitgen::Philox;
21/// use ferray_random::bitgen::BitGenerator;
22///
23/// let mut rng = Philox::seed_from_u64(42);
24/// let val = rng.next_u64();
25/// ```
26pub struct Philox {
27    /// 4x32-bit counter
28    counter: [u32; 4],
29    /// 2x32-bit key (derived from seed and stream)
30    key: [u32; 2],
31    /// Buffered output (4 x u32 = 2 x u64)
32    buffer: [u32; 4],
33    /// Index into buffer (0..4)
34    buf_idx: usize,
35}
36
37// Philox 4x32 round constants
38const PHILOX_M0: u32 = 0xD251_1F53;
39const PHILOX_M1: u32 = 0xCD9E_8D57;
40const PHILOX_W0: u32 = 0x9E37_79B9;
41const PHILOX_W1: u32 = 0xBB67_AE85;
42
43/// Single Philox round: two multiplications with xor-folding.
44///
45/// `key` is taken by reference so the caller's stationary key buffer
46/// stays put across the unrolled round sequence; copying the 8-byte key
47/// every round defeats register allocation in release builds.
48#[inline]
49#[allow(clippy::trivially_copy_pass_by_ref)]
50const fn philox_round(ctr: &mut [u32; 4], key: &[u32; 2]) {
51    let lo0 = ctr[0] as u64 * PHILOX_M0 as u64;
52    let lo1 = ctr[2] as u64 * PHILOX_M1 as u64;
53    let hi0 = (lo0 >> 32) as u32;
54    let lo0 = lo0 as u32;
55    let hi1 = (lo1 >> 32) as u32;
56    let lo1 = lo1 as u32;
57    let new0 = hi1 ^ ctr[1] ^ key[0];
58    let new1 = lo1;
59    let new2 = hi0 ^ ctr[3] ^ key[1];
60    let new3 = lo0;
61    ctr[0] = new0;
62    ctr[1] = new1;
63    ctr[2] = new2;
64    ctr[3] = new3;
65}
66
67/// Bump key between rounds.
68#[inline]
69const fn philox_bump_key(key: &mut [u32; 2]) {
70    key[0] = key[0].wrapping_add(PHILOX_W0);
71    key[1] = key[1].wrapping_add(PHILOX_W1);
72}
73
74/// Full Philox 4x32-10 bijection: 10 rounds.
75const fn philox4x32_10(counter: [u32; 4], key: [u32; 2]) -> [u32; 4] {
76    let mut ctr = counter;
77    let mut k = key;
78    // 10 rounds with key bumps between rounds
79    philox_round(&mut ctr, &k);
80    philox_bump_key(&mut k);
81    philox_round(&mut ctr, &k);
82    philox_bump_key(&mut k);
83    philox_round(&mut ctr, &k);
84    philox_bump_key(&mut k);
85    philox_round(&mut ctr, &k);
86    philox_bump_key(&mut k);
87    philox_round(&mut ctr, &k);
88    philox_bump_key(&mut k);
89    philox_round(&mut ctr, &k);
90    philox_bump_key(&mut k);
91    philox_round(&mut ctr, &k);
92    philox_bump_key(&mut k);
93    philox_round(&mut ctr, &k);
94    philox_bump_key(&mut k);
95    philox_round(&mut ctr, &k);
96    philox_bump_key(&mut k);
97    philox_round(&mut ctr, &k);
98    ctr
99}
100
101impl Philox {
102    /// Increment the 128-bit counter (4 x u32, little-endian).
103    fn increment_counter(&mut self) {
104        for word in &mut self.counter {
105            *word = word.wrapping_add(1);
106            if *word != 0 {
107                return;
108            }
109        }
110    }
111
112    /// Generate the next block of 4 random u32 values.
113    fn generate_block(&mut self) {
114        self.buffer = philox4x32_10(self.counter, self.key);
115        self.increment_counter();
116        self.buf_idx = 0;
117    }
118
119    /// Create a Philox generator with explicit key and starting counter.
120    fn new_with_key(key: [u32; 2], counter: [u32; 4]) -> Self {
121        let mut rng = Self {
122            counter,
123            key,
124            buffer: [0; 4],
125            buf_idx: 4, // Force generation on first call
126        };
127        rng.generate_block();
128        rng
129    }
130}
131
132impl BitGenerator for Philox {
133    fn state_bytes(&self) -> Result<Vec<u8>, ferray_core::FerrayError> {
134        // Layout (little-endian, 44 bytes total):
135        //   counter:  4 × u32 = 16 bytes
136        //   key:      2 × u32 =  8 bytes
137        //   buffer:   4 × u32 = 16 bytes
138        //   buf_idx:      u32 =  4 bytes
139        let mut out = Vec::with_capacity(44);
140        for &w in &self.counter {
141            out.extend_from_slice(&w.to_le_bytes());
142        }
143        for &w in &self.key {
144            out.extend_from_slice(&w.to_le_bytes());
145        }
146        for &w in &self.buffer {
147            out.extend_from_slice(&w.to_le_bytes());
148        }
149        out.extend_from_slice(&(self.buf_idx as u32).to_le_bytes());
150        Ok(out)
151    }
152
153    fn set_state_bytes(&mut self, bytes: &[u8]) -> Result<(), ferray_core::FerrayError> {
154        if bytes.len() != 44 {
155            return Err(ferray_core::FerrayError::invalid_value(format!(
156                "Philox state must be 44 bytes, got {}",
157                bytes.len()
158            )));
159        }
160        let mut counter = [0u32; 4];
161        let mut key = [0u32; 2];
162        let mut buffer = [0u32; 4];
163        for (i, chunk) in bytes[0..16].chunks_exact(4).enumerate() {
164            counter[i] = u32::from_le_bytes(chunk.try_into().unwrap());
165        }
166        for (i, chunk) in bytes[16..24].chunks_exact(4).enumerate() {
167            key[i] = u32::from_le_bytes(chunk.try_into().unwrap());
168        }
169        for (i, chunk) in bytes[24..40].chunks_exact(4).enumerate() {
170            buffer[i] = u32::from_le_bytes(chunk.try_into().unwrap());
171        }
172        let buf_idx = u32::from_le_bytes(bytes[40..44].try_into().unwrap());
173        if buf_idx > 4 {
174            return Err(ferray_core::FerrayError::invalid_value(format!(
175                "Philox buf_idx must be in [0, 4], got {buf_idx}"
176            )));
177        }
178        self.counter = counter;
179        self.key = key;
180        self.buffer = buffer;
181        self.buf_idx = buf_idx as usize;
182        Ok(())
183    }
184
185    fn next_u64(&mut self) -> u64 {
186        if self.buf_idx >= 4 {
187            self.generate_block();
188        }
189        let lo = self.buffer[self.buf_idx] as u64;
190        self.buf_idx += 1;
191        if self.buf_idx >= 4 {
192            self.generate_block();
193        }
194        let hi = self.buffer[self.buf_idx] as u64;
195        self.buf_idx += 1;
196        lo | (hi << 32)
197    }
198
199    fn seed_from_u64(seed: u64) -> Self {
200        let key = [seed as u32, (seed >> 32) as u32];
201        Self::new_with_key(key, [0; 4])
202    }
203
204    fn jump(&mut self) -> Option<()> {
205        // Philox supports jump by advancing the counter by 2^64
206        // (each counter value produces 4 u32 = 2 u64, so this is 2^65 u64 outputs)
207        self.counter[2] = self.counter[2].wrapping_add(1);
208        if self.counter[2] == 0 {
209            self.counter[3] = self.counter[3].wrapping_add(1);
210        }
211        self.buf_idx = 4; // Force re-generation
212        Some(())
213    }
214
215    fn stream(seed: u64, stream_id: u64) -> Option<Self> {
216        // Encode stream_id into the counter's upper 64 bits
217        let key = [seed as u32, (seed >> 32) as u32];
218        let counter = [0u32, 0u32, stream_id as u32, (stream_id >> 32) as u32];
219        Some(Self::new_with_key(key, counter))
220    }
221}
222
223impl Clone for Philox {
224    fn clone(&self) -> Self {
225        Self {
226            counter: self.counter,
227            key: self.key,
228            buffer: self.buffer,
229            buf_idx: self.buf_idx,
230        }
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn deterministic_output() {
240        let mut rng1 = Philox::seed_from_u64(42);
241        let mut rng2 = Philox::seed_from_u64(42);
242        for _ in 0..1000 {
243            assert_eq!(rng1.next_u64(), rng2.next_u64());
244        }
245    }
246
247    #[test]
248    fn different_seeds_differ() {
249        let mut rng1 = Philox::seed_from_u64(42);
250        let mut rng2 = Philox::seed_from_u64(43);
251        let mut same = true;
252        for _ in 0..100 {
253            if rng1.next_u64() != rng2.next_u64() {
254                same = false;
255                break;
256            }
257        }
258        assert!(!same);
259    }
260
261    #[test]
262    fn stream_support() {
263        let rng0 = Philox::stream(42, 0);
264        let rng1 = Philox::stream(42, 1);
265        assert!(rng0.is_some());
266        assert!(rng1.is_some());
267
268        let mut rng0 = rng0.unwrap();
269        let mut rng1 = rng1.unwrap();
270        // Different streams should produce different output
271        let v0 = rng0.next_u64();
272        let v1 = rng1.next_u64();
273        assert_ne!(v0, v1);
274    }
275
276    #[test]
277    fn jump_support() {
278        let mut rng = Philox::seed_from_u64(42);
279        assert!(rng.jump().is_some());
280    }
281
282    #[test]
283    fn stream_deterministic() {
284        let mut rng1 = Philox::stream(42, 7).unwrap();
285        let mut rng2 = Philox::stream(42, 7).unwrap();
286        for _ in 0..1000 {
287            assert_eq!(rng1.next_u64(), rng2.next_u64());
288        }
289    }
290}