ya_rand/
rng.rs

1use crate::util::{as_raw_bytes_mut, wide_mul};
2use core::ptr::swap;
3
4#[cfg(feature = "alloc")]
5use {
6    crate::ya_rand_encoding::Encoder,
7    alloc::{string::String, vec, vec::Vec},
8};
9
10const F64_MANT: u32 = f64::MANTISSA_DIGITS;
11const F32_MANT: u32 = f32::MANTISSA_DIGITS;
12const F64_MAX_PRECISE: u64 = 1 << F64_MANT;
13const F32_MAX_PRECISE: u64 = 1 << F32_MANT;
14const F64_DIVISOR: f64 = F64_MAX_PRECISE as f64;
15const F32_DIVISOR: f32 = F32_MAX_PRECISE as f32;
16pub const ALPHANUMERIC: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
17
18/// Trait for RNGs that provide cryptographically secure data.
19pub trait SecureYARandGenerator: YARandGenerator {
20    /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
21    ///
22    /// # Examples
23    ///
24    /// ```
25    /// use ya_rand::*;
26    ///
27    /// let mut rng = new_rng_secure();
28    /// let mut data = [0; 1738];
29    /// rng.fill_bytes(&mut data);
30    /// assert!(data.into_iter().any(|v| v != 0));
31    /// ```
32    fn fill_bytes(&mut self, dst: &mut [u8]);
33
34    /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
35    ///
36    /// Differs from [`SecureYARandGenerator::fill_bytes`] in that the underlying type of `dst`
37    /// doesn't need to be any specific type.
38    ///
39    /// # Examples
40    ///
41    /// ```
42    /// use ya_rand::*;
43    ///
44    /// #[repr(C)]
45    /// #[derive(Clone, Copy, Default, PartialEq, Eq)]
46    /// struct NotAByte {
47    ///     x: u16,
48    ///     y: u32,
49    ///     z: u64,
50    /// }
51    ///
52    /// let mut rng = new_rng_secure();
53    /// let zero_value = NotAByte::default();
54    /// let mut data = [zero_value; 69];
55    /// unsafe {
56    ///     rng.fill_raw(&mut data);
57    /// }
58    /// assert!(data.into_iter().any(|v| v != zero_value));
59    /// ```
60    ///
61    /// # Safety
62    ///
63    /// `T` must be valid as nothing more than a collection of bytes.
64    /// Integer types are the simplest example of this, but structs of integer types
65    /// generally should fall under the same umbrella.
66    #[inline]
67    unsafe fn fill_raw<T>(&mut self, dst: &mut [T]) {
68        // SAFETY: The caller has promised not to be a fucking dumbass.
69        let dst_as_bytes = unsafe { as_raw_bytes_mut(dst) };
70        self.fill_bytes(dst_as_bytes);
71    }
72
73    /// Generates a random `String` with length `len`, using the provided
74    /// `Encoder` to determine character set and minimum secure length. Since
75    /// character sets can only contain valid ascii values, the length of the created
76    /// `String` reprensents both the size of the `String` in bytes, and the
77    /// amount of characters it contains.
78    ///
79    /// Returns `None` when `len` is less than what the encoder declares to be secure.
80    ///
81    /// All provided encoders are accessible through [`crate::ya_rand_encoding`].
82    /// Users wishing to implement their own encoding must do so through the
83    /// [`Encoder`](crate::ya_rand_encoding::Encoder) trait.
84    ///
85    /// Originally inspired by golang's addition of [`rand.Text`] in release 1.24,
86    /// but altered to be encoding/length generic and unbiased for non-trivial bases.
87    ///
88    /// [`rand.Text`]:
89    /// https://cs.opensource.google/go/go/+/refs/tags/go1.24.0:src/crypto/rand/text.go
90    ///
91    /// # Examples
92    ///
93    /// ```
94    /// use ya_rand::*;
95    /// use ya_rand_encoding::Base16Lowercase as B16L;
96    ///
97    /// const LEN: usize = 420;
98    /// let mut rng = new_rng_secure();
99    /// // Safe to unwrap because 420 is greater than 32, which
100    /// // is the minimum secure length of `Base16Lowercase`.
101    /// let hex_string = rng.text::<B16L>(LEN).unwrap();
102    /// assert!(hex_string.len() == LEN);
103    /// assert!(hex_string.bytes().count() == LEN);
104    /// assert!(hex_string.chars().count() == LEN);
105    /// for c in hex_string.bytes() {
106    ///     assert!(
107    ///         (b'0' <= c && c <= b'9') ||
108    ///         (b'a' <= c && c <= b'f')
109    ///     );
110    /// }
111    /// ```
112    #[cfg(feature = "alloc")]
113    fn text<E: Encoder>(&mut self, len: usize) -> Option<String> {
114        match len >= E::MIN_LEN {
115            true => Some({
116                const BYTE_VALUES: usize = 1 << u8::BITS;
117                // TODO: Test if forcing page faulting with non-zero
118                // initializer improves perfermance.
119                let mut data = vec![0; len];
120                // This branch is evaluated at compile time, so concrete
121                // implementations in final binaries will only have the
122                // contents of the branch suitable for the encoder used.
123                if BYTE_VALUES % E::CHARSET.len() == 0 {
124                    self.fill_bytes(&mut data);
125                    // Directly map each random u8 to a character in the set.
126                    // Since this branch is only reachable when length is a power
127                    // of two, the modulo gets optimized out and the whole thing
128                    // gets vectorized. The assembly for this branch is
129                    // absolutely beautiful.
130                    // This approach is extremely efficient, but only produces
131                    // unbiased random sequences when the length of the current
132                    // `CHARSET` is divisible by the amount of possible u8 values,
133                    // which is why we need a fallback approach.
134                    for d in &mut data {
135                        let random_index = *d as usize;
136                        *d = E::CHARSET[random_index % E::CHARSET.len()];
137                    }
138                } else {
139                    // Alternative approach that's potentially much slower,
140                    // but always produces unbiased results.
141                    // The unwrap gets optimized out since rustc can see that
142                    // `E::CHARSET` has a non-zero length.
143                    data.fill_with(|| *self.choose(E::CHARSET).unwrap());
144                }
145                // SAFETY: All provided encoders only use ascii values, and
146                // custom `Encoder` implementations agree to do the same when
147                // implementing the trait.
148                unsafe { String::from_utf8_unchecked(data) }
149            }),
150            false => None,
151        }
152    }
153}
154
155/// Trait for RNGs that can be created from a user-provided seed.
156pub trait SeedableYARandGenerator: YARandGenerator + Default {
157    /// Creates a generator from the output of an internal PRNG,
158    /// which is itself seeded from `seed`.
159    ///
160    /// As a rule: unless you are **absolutely certain** that you need to manually
161    /// seed a generator, you don't.
162    /// Instead, use [`crate::new_rng`] when you need to create a new instance.
163    ///
164    /// If you have a scenario where you really do need a set seed, prefer to use the
165    /// `Default` implementation of the desired generator.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// use ya_rand::*;
171    ///
172    /// let mut rng1 = ShiroRng::new_with_seed(0);
173    /// // Default initialization is just a shortcut for explicitly seeding with 0.
174    /// let mut rng2 = ShiroRng::default();
175    /// assert!(rng1 == rng2);
176    ///
177    /// let result1 = rng1.u64();
178    /// let result2 = rng2.u64();
179    /// assert!(result1 == result2);
180    /// ```
181    fn new_with_seed(seed: u64) -> Self;
182}
183
184/// Base trait that all RNGs must implement.
185pub trait YARandGenerator: Sized {
186    /// Creates a generator using randomness provided by the OS.
187    ///
188    /// Unlike [`YARandGenerator::new`], which will panic on failure, `try_new`
189    /// propagates the error-handling responsibility to the user. But the probability
190    /// of your operating systems RNG failing is absurdly low, and in the rare case that it
191    /// does fail, that's not really an issue most users are going to be able to address.
192    ///
193    /// Stick to using [`crate::new_rng`], unless you really need a generator of a
194    /// different type (you probably don't), then use `new` on your desired type.
195    fn try_new() -> Result<Self, getrandom::Error>;
196
197    /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>64</sup>).
198    fn u64(&mut self) -> u64;
199
200    /// Creates a generator using randomness provided by the OS.
201    ///
202    /// It is recommended to use the top-level [`crate::new_rng`] instead
203    /// of calling this function on a specific generator type.
204    ///
205    /// # Safety
206    ///
207    /// This function will panic if your OS rng fails to provide enough entropy.
208    /// But this is extremely unlikely, and unless you're working at the kernel level it's
209    /// not something you should ever be concerned with.
210    ///
211    /// Since Windows 10 this function is infallible, thanks to modern Windows versions adopting
212    /// a user-space cryptographic architecture that can't fail during runtime.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// use ya_rand::*;
218    ///
219    /// // Recommended usage
220    /// let mut rng1 = new_rng();
221    /// // More explicit
222    /// let mut rng2 = ShiroRng::new();
223    /// // Even more explicit
224    /// let mut rng3 = Xoshiro256pp::new();
225    /// // Since these are all created using OS entropy, the odds of
226    /// // their initial states colliding is vanishingly small.
227    /// assert!(rng1 != rng2);
228    /// assert!(rng1 != rng3);
229    /// assert!(rng2 != rng3);
230    /// ```
231    #[inline]
232    fn new() -> Self {
233        Self::try_new().expect("retrieving random data from the operating system should never fail")
234    }
235
236    /// Returns a uniformly distributed `usize` in the interval \[0, `usize::MAX`\].
237    #[inline]
238    fn usize(&mut self) -> usize {
239        self.bits(usize::BITS) as usize
240    }
241
242    /// Returns a uniformly distributed `u32` in the interval [0, 2<sup>32</sup>).
243    #[inline]
244    fn u32(&mut self) -> u32 {
245        self.bits(u32::BITS) as u32
246    }
247
248    /// Returns a uniformly distributed `u16` in the interval [0, 2<sup>16</sup>).
249    #[inline]
250    fn u16(&mut self) -> u16 {
251        self.bits(u16::BITS) as u16
252    }
253
254    /// Returns a uniformly distributed `u8` in the interval [0, 2<sup>8</sup>).
255    #[inline]
256    fn u8(&mut self) -> u8 {
257        self.bits(u8::BITS) as u8
258    }
259
260    /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>`bit_count`</sup>).
261    ///
262    /// The value of `bit_count` is clamped to 64.
263    #[inline]
264    fn bits(&mut self, bit_count: u32) -> u64 {
265        self.u64() >> (u64::BITS - bit_count.min(u64::BITS))
266    }
267
268    /// A simple coinflip, returning a `bool` that has a 50% chance of being true.
269    ///
270    /// # Examples
271    ///
272    /// ```
273    /// use ya_rand::*;
274    ///
275    /// const ITERATIONS: u64 = 1 << 24;
276    /// let mut rng = new_rng();
277    /// let mut yes: u64 = 0;
278    /// let mut no: u64 = 0;
279    /// for _ in 0..ITERATIONS {
280    ///     if rng.bool() {
281    ///         yes += 1;
282    ///     } else {
283    ///         no += 1;
284    ///     }
285    /// }
286    /// // We expect the difference to be within ~5%.
287    /// let THRESHOLD: u64 = ITERATIONS / 20;
288    /// assert!(yes.abs_diff(no) <= THRESHOLD);
289    /// ```
290    #[inline]
291    fn bool(&mut self) -> bool {
292        // Compiles to a single "shr 63" instruction.
293        self.bits(1) == 1
294    }
295
296    /// Returns a uniformly distributed `u64` in the interval [0, `max`).
297    ///
298    /// Using [`YARandGenerator::bits`] when `max` happens to be a power of 2
299    /// will be significantly faster.
300    ///
301    /// # Examples
302    ///
303    /// ```
304    /// use ya_rand::*;
305    ///
306    /// let mut rng = new_rng();
307    /// // Special case: bound of 0 always returns 0.
308    /// assert!(rng.bound(0) == 0);
309    /// for i in 1..=2000 {
310    ///     for _ in 0..(i * 2) {
311    ///         assert!(rng.bound(i) < i);
312    ///     }
313    /// }
314    /// ```
315    #[inline]
316    fn bound(&mut self, max: u64) -> u64 {
317        // Lemire's nearly divisionless method: https://arxiv.org/abs/1805.10941.
318        let (mut high, mut low) = wide_mul(self.u64(), max);
319        match low < max {
320            false => {}
321            true => {
322                // The dreaded division.
323                let threshold = max.wrapping_neg() % max;
324                while low < threshold {
325                    (high, low) = wide_mul(self.u64(), max);
326                }
327            }
328        }
329        debug_assert!(
330            (max != 0 && high < max) || high == 0,
331            "BUG: assertion should be unreachable"
332        );
333        high
334    }
335
336    /// Returns a uniformly distributed `u64` in the interval \[0, `max`\].
337    ///
338    /// It is expected that `max` < `u64::MAX`.
339    ///
340    /// # Examples
341    ///
342    /// ```
343    /// use ya_rand::*;
344    ///
345    /// let mut rng = new_rng();
346    /// for i in 0..=2000 {
347    ///     for _ in 0..(i * 2) {
348    ///         assert!(rng.bound_inclusive(i) <= i);
349    ///     }
350    /// }
351    /// ```
352    #[inline]
353    fn bound_inclusive(&mut self, max: u64) -> u64 {
354        self.bound(max + 1)
355    }
356
357    /// Returns a uniformly distributed `i64` in the interval [`min`, `max`)
358    ///
359    /// It is expected that `min` < `max`.
360    #[inline]
361    fn range(&mut self, min: i64, max: i64) -> i64 {
362        let delta = max.abs_diff(min);
363        (self.bound(delta) as i64) + min
364    }
365
366    /// Returns a uniformly distributed `i64` in the interval \[`min`, `max`\]
367    ///
368    /// It is expected that `min` <= `max` and `max` < `u64::MAX`.
369    #[inline]
370    fn range_inclusive(&mut self, min: i64, max: i64) -> i64 {
371        self.range(min, max + 1)
372    }
373
374    /// Returns a uniformly distributed `f64` in the interval [0.0, 1.0).
375    #[inline]
376    fn f64(&mut self) -> f64 {
377        self.bits(F64_MANT) as f64 / F64_DIVISOR
378    }
379
380    /// Returns a uniformly distributed `f32` in the interval [0.0, 1.0).
381    #[inline]
382    fn f32(&mut self) -> f32 {
383        self.bits(F32_MANT) as f32 / F32_DIVISOR
384    }
385
386    /// Returns a uniformly distributed `f64` in the interval (0.0, 1.0].
387    #[inline]
388    fn f64_nonzero(&mut self) -> f64 {
389        // Interval of (0, 2^53]
390        let nonzero = self.bits(F64_MANT) + 1;
391        nonzero as f64 / F64_DIVISOR
392    }
393
394    /// Returns a uniformly distributed `f32` in the interval (0.0, 1.0].
395    #[inline]
396    fn f32_nonzero(&mut self) -> f32 {
397        // Interval of (0, 2^24]
398        let nonzero = self.bits(F32_MANT) + 1;
399        nonzero as f32 / F32_DIVISOR
400    }
401
402    /// Returns a uniformly distributed `f64` in the interval (-1.0, 1.0).
403    #[inline]
404    fn f64_wide(&mut self) -> f64 {
405        // This approach is faster than using Generator::range.
406        const BITS: u32 = F64_MANT + 1;
407        const OFFSET: i64 = F64_MAX_PRECISE as i64;
408        let mut x: i64;
409        loop {
410            // Start with an interval of [0, 2^54)
411            x = self.bits(BITS) as i64;
412            // Interval is now (0, 2^54)
413            match x != 0 {
414                true => break,
415                false => {}
416            }
417        }
418        // Shift interval to (-2^53, 2^53)
419        x -= OFFSET;
420        x as f64 / F64_DIVISOR
421    }
422
423    /// Returns a uniformly distributed `f32` in the interval (-1.0, 1.0).
424    #[inline]
425    fn f32_wide(&mut self) -> f32 {
426        // This approach is faster than using Generator::range.
427        const BITS: u32 = F32_MANT + 1;
428        const OFFSET: i64 = F32_MAX_PRECISE as i64;
429        let mut x: i64;
430        loop {
431            // Start with an interval of [0, 2^25)
432            x = self.bits(BITS) as i64;
433            // Interval is now (0, 2^25)
434            match x != 0 {
435                true => break,
436                false => {}
437            }
438        }
439        // Shift interval to (-2^24, 2^24)
440        x -= OFFSET;
441        x as f32 / F32_DIVISOR
442    }
443
444    /// Returns two indepedent and normally distributed `f64` values with
445    /// a `mean` of `0.0` and a `stddev` of `1.0`.
446    #[cfg(feature = "std")]
447    fn f64_normal(&mut self) -> (f64, f64) {
448        // Marsaglia polar method.
449        // TLDR: It projects a point within the unit
450        // circle onto the unit radius.
451        let mut x: f64;
452        let mut y: f64;
453        let mut s: f64;
454        loop {
455            x = self.f64_wide();
456            y = self.f64_wide();
457            s = (x * x) + (y * y);
458            // Reroll if `s` does not lie **within** the unit circle.
459            match s < 1.0 && s != 0.0 {
460                true => break,
461                false => {}
462            }
463        }
464        let t = (2.0 * s.ln().abs() / s).sqrt();
465        (x * t, y * t)
466    }
467
468    /// Returns two indepedent and normally distributed `f64` values with
469    /// user-defined `mean` and `stddev`.
470    ///
471    /// It is expected that `stddev.abs()` != `0.0`.
472    #[cfg(feature = "std")]
473    #[inline]
474    fn f64_normal_distribution(&mut self, mean: f64, stddev: f64) -> (f64, f64) {
475        let (x, y) = self.f64_normal();
476        (x.mul_add(stddev, mean), y.mul_add(stddev, mean))
477    }
478
479    /// Returns an exponentially distributed `f64` with `lambda` of `1.0`.
480    #[cfg(feature = "std")]
481    #[inline]
482    fn f64_exponential(&mut self) -> f64 {
483        // Using abs() instead of negating the result of ln() to
484        // eliminate the possibility of ever returning -0.0.
485        self.f64_nonzero().ln().abs()
486    }
487
488    /// Returns an exponentially distributed `f64` with user-defined `lambda`.
489    ///
490    /// It is expected that `lambda.abs()` != `0.0`.
491    #[cfg(feature = "std")]
492    #[inline]
493    fn f64_exponential_lambda(&mut self, lambda: f64) -> f64 {
494        self.f64_exponential() / lambda
495    }
496
497    /// Returns a randomly chosen item from the iterator of `collection`.
498    ///
499    /// Returns `None` when the length of the iterator is zero.
500    ///
501    /// # Examples
502    ///
503    /// ```
504    /// use ya_rand::*;
505    ///
506    /// const SIZE: usize = 1738;
507    /// let mut rng = new_rng();
508    /// let mut v = [0; SIZE];
509    /// for i in 0..SIZE {
510    ///     v[i] = i;
511    /// }
512    /// let (top, bottom) = v.split_at(v.len() / 2);
513    ///
514    /// // Sanity check.
515    /// let random_item = rng.choose(&v).expect("vector `v` is not empty");
516    /// assert!(v.contains(random_item));
517    ///
518    /// // Choose `random_item` from the top half of the array.
519    /// let random_item = rng.choose(top).expect("still not empty");
520    /// assert!(top.contains(random_item) == true);
521    ///
522    /// // We're looking in the bottom half so we won't find the
523    /// // `random_item` from the top half.
524    /// assert!(bottom.contains(random_item) == false);
525    /// ```
526    #[inline]
527    fn choose<C>(&mut self, collection: C) -> Option<C::Item>
528    where
529        C: IntoIterator,
530        C::IntoIter: ExactSizeIterator,
531    {
532        let mut iter = collection.into_iter();
533        let len = iter.len();
534        match len != 0 {
535            true => Some({
536                let idx = self.bound(len as u64) as usize;
537                // SAFETY: Since `bound` always returns a value less than
538                // it's input, `nth` will never return `None`.
539                unsafe { iter.nth(idx).unwrap_unchecked() }
540            }),
541            false => None,
542        }
543    }
544
545    /// Returns a randomly selected ASCII character from the pool of:
546    ///
547    /// `'A'..='Z'`, and`'a'..='z'`
548    #[inline]
549    fn ascii_alphabetic(&mut self) -> char {
550        *self.choose(&ALPHANUMERIC[..52]).unwrap() as char
551    }
552
553    /// Returns a randomly selected ASCII character from the pool of:
554    ///
555    /// `'A'..='Z'`
556    #[inline]
557    fn ascii_uppercase(&mut self) -> char {
558        *self.choose(&ALPHANUMERIC[..26]).unwrap() as char
559    }
560
561    /// Returns a randomly selected ASCII character from the pool of:
562    ///
563    /// `'a'..='z'`
564    #[inline]
565    fn ascii_lowercase(&mut self) -> char {
566        *self.choose(&ALPHANUMERIC[26..52]).unwrap() as char
567    }
568
569    /// Returns a randomly selected ASCII character from the pool of:
570    ///
571    /// `'A'..='Z'`, `'a'..='z'`, and `'0'..='9'`
572    #[inline]
573    fn ascii_alphanumeric(&mut self) -> char {
574        *self.choose(&ALPHANUMERIC[..]).unwrap() as char
575    }
576
577    /// Returns a randomly selected ASCII character from the pool of:
578    ///
579    /// `'0'..='9'`
580    #[inline]
581    fn ascii_digit(&mut self) -> char {
582        *self.choose(&ALPHANUMERIC[52..]).unwrap() as char
583    }
584
585    /// Performs a Fisher-Yates shuffle on the contents of `slice`.
586    ///
587    /// This implementation is the modern variant introduced by
588    /// Richard Durstenfeld. It is in-place and O(n).
589    ///
590    /// # Examples
591    ///
592    /// ```
593    /// use ya_rand::*;
594    ///
595    /// let mut rng = new_rng();
596    /// let mut data = [0; 1738];
597    /// for i in 0..data.len() {
598    ///     data[i] = i;
599    /// }
600    /// assert!(data.is_sorted() == true);
601    ///
602    /// rng.shuffle(&mut data);
603    /// assert!(data.is_sorted() == false);
604    /// ```
605    fn shuffle<T>(&mut self, slice: &mut [T]) {
606        let slice_ptr = slice.as_mut_ptr();
607        for i in (1..slice.len()).rev() {
608            let j = self.bound_inclusive(i as u64) as usize;
609            // SAFETY: Index 'i' will always be in bounds because it's
610            // bounded by slice length; index 'j' will always be
611            // in bounds because it's bounded by 'i'.
612            unsafe {
613                swap(slice_ptr.add(i), slice_ptr.add(j));
614            }
615        }
616    }
617
618    /// Clones `slice` into a new `Vec`, performs a Fisher-Yates
619    /// shuffle on it's contents, and returns the result.
620    ///
621    /// See [`YARandGenerator::shuffle`] for details/examples.
622    #[cfg(feature = "alloc")]
623    #[inline]
624    fn shuffle_cloned<T: Clone>(&mut self, slice: &[T]) -> Vec<T> {
625        let mut v = slice.to_vec();
626        self.shuffle(&mut v);
627        v
628    }
629}