ya_rand/
rng.rs

1use crate::util::wide_mul;
2use core::ptr::swap;
3use core::slice::from_raw_parts_mut;
4
5#[cfg(feature = "alloc")]
6use {
7    crate::ya_rand_encoding::Encoder,
8    alloc::{boxed::Box, string::String, vec::Vec},
9};
10
11const F64_MANT: u32 = f64::MANTISSA_DIGITS;
12const F32_MANT: u32 = f32::MANTISSA_DIGITS;
13const F64_MAX_PRECISE: u64 = 1 << F64_MANT;
14const F32_MAX_PRECISE: u64 = 1 << F32_MANT;
15const F64_DIVISOR: f64 = F64_MAX_PRECISE as f64;
16const F32_DIVISOR: f32 = F32_MAX_PRECISE as f32;
17pub const ALPHANUMERIC: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
18
19/// Trait for RNGs that provide cryptographically secure data.
20pub trait SecureYARandGenerator: YARandGenerator {
21    /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
22    ///
23    /// # Examples
24    ///
25    /// ```
26    /// use ya_rand::*;
27    ///
28    /// let mut rng = new_rng_secure();
29    /// let mut data = [0; 1738];
30    /// rng.fill_bytes(&mut data);
31    /// assert!(data.into_iter().any(|v| v != 0));
32    /// ```
33    fn fill_bytes(&mut self, dst: &mut [u8]);
34
35    /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
36    ///
37    /// Differs from [`SecureYARandGenerator::fill_bytes`] in that the underlying type of `dst`
38    /// doesn't need to be any specific type.
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// use ya_rand::*;
44    ///
45    /// #[repr(C)]
46    /// #[derive(Clone, Copy, Default, PartialEq, Eq)]
47    /// struct NotAByte {
48    ///     x: u16,
49    ///     y: u32,
50    ///     z: u64,
51    /// }
52    ///
53    /// let mut rng = new_rng_secure();
54    /// let zero_value = NotAByte::default();
55    /// let mut data = [zero_value; 69];
56    /// unsafe {
57    ///     rng.fill_raw(&mut data);
58    /// }
59    /// assert!(data.into_iter().any(|v| v != zero_value));
60    /// ```
61    ///
62    /// # Safety
63    ///
64    /// `T` must be valid as nothing more than a collection of bytes.
65    /// Integer types are the simplest example of this, but structs of integer types
66    /// generally should fall under the same umbrella.
67    #[inline]
68    unsafe fn fill_raw<T>(&mut self, dst: &mut [T]) {
69        // SAFETY: The caller has promised not to be a fucking dumbass.
70        let dst_as_bytes = unsafe {
71            let data = dst.as_mut_ptr().cast();
72            let len = dst.len() * size_of::<T>();
73            from_raw_parts_mut(data, len)
74        };
75        self.fill_bytes(dst_as_bytes);
76    }
77
78    /// Generates a random `String` with length `len`, using the provided
79    /// `Encoder` to determine character set and minimum secure length. Since
80    /// character sets can only contain valid ascii values, the length of the created
81    /// `String` reprensents both the size of the `String` in bytes, and the
82    /// amount of characters it contains.
83    ///
84    /// Returns `None` when `len` is less than what the encoder declares to be secure.
85    ///
86    /// All provided encoders are accessible through [`crate::ya_rand_encoding`].
87    /// Users wishing to implement their own encoding must do so through the
88    /// [`Encoder`](crate::ya_rand_encoding::Encoder) trait.
89    ///
90    /// Originally inspired by golang's addition of [`rand.Text`] in release 1.24,
91    /// but altered to be encoding/length generic and unbiased for non-trivial bases.
92    ///
93    /// [`rand.Text`]:
94    /// https://cs.opensource.google/go/go/+/refs/tags/go1.24.0:src/crypto/rand/text.go
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// use ya_rand::*;
100    /// use ya_rand_encoding::Base16Lowercase as B16L;
101    ///
102    /// const LEN: usize = 420;
103    /// let mut rng = new_rng_secure();
104    /// // Safe to unwrap because 420 is greater than 32, which
105    /// // is the minimum secure length of `Base16Lowercase`.
106    /// let hex_string = rng.text::<B16L>(LEN).unwrap();
107    /// assert!(hex_string.len() == LEN);
108    /// assert!(hex_string.bytes().count() == LEN);
109    /// assert!(hex_string.chars().count() == LEN);
110    /// for c in hex_string.bytes() {
111    ///     assert!(
112    ///         (b'0' <= c && c <= b'9') ||
113    ///         (b'a' <= c && c <= b'f')
114    ///     );
115    /// }
116    /// ```
117    #[cfg(feature = "alloc")]
118    fn text<E: Encoder>(&mut self, len: usize) -> Option<String> {
119        match len >= E::MIN_LEN {
120            true => Some({
121                const BYTE_VALUES: usize = 1 << u8::BITS;
122                // SAFETY: u8's are a trivial type and we pwomise to
123                // always overwrite all of them UwU.
124                let mut data = unsafe { Box::new_uninit_slice(len).assume_init().into_vec() };
125                // This branch is evaluated at compile time, so concrete
126                // implementations in final binaries will only have the
127                // contents of the branch suitable for the encoder used.
128                if BYTE_VALUES % E::CHARSET.len() == 0 {
129                    self.fill_bytes(&mut data);
130                    // Directly map each random u8 to a character in the set.
131                    // Since this branch is only reachable when length is a power
132                    // of two, the modulo gets optimized out and the whole thing
133                    // gets vectorized. The assembly for this branch is
134                    // absolutely beautiful.
135                    // This approach is extremely efficient, but only produces
136                    // unbiased random sequences when the length of the current
137                    // `CHARSET` is divisible by the amount of possible u8 values,
138                    // which is why we need a fallback approach.
139                    for d in &mut data {
140                        let random_index = *d as usize;
141                        *d = E::CHARSET[random_index % E::CHARSET.len()];
142                    }
143                } else {
144                    // Alternative approach that's potentially much slower,
145                    // but always produces unbiased results.
146                    // The unwrap gets optimized out since rustc can see that
147                    // `E::CHARSET` has a non-zero length.
148                    data.fill_with(|| *self.choose(E::CHARSET).unwrap());
149                }
150                // SAFETY: All provided encoders only use ascii values, and
151                // custom `Encoder` implementations agree to do the same when
152                // implementing the trait.
153                unsafe { String::from_utf8_unchecked(data) }
154            }),
155            false => None,
156        }
157    }
158}
159
160/// Trait for RNGs that can be created from a user-provided seed.
161pub trait SeedableYARandGenerator: YARandGenerator + Default {
162    /// Creates a generator from the output of an internal PRNG,
163    /// which is itself seeded from `seed`.
164    ///
165    /// As a rule: unless you are **absolutely certain** that you need to manually
166    /// seed a generator, you don't.
167    /// Instead, use [`crate::new_rng`] when you need to create a new instance.
168    ///
169    /// If you have a scenario where you really do need a set seed, prefer to use the
170    /// `Default` implementation of the desired generator.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use ya_rand::*;
176    ///
177    /// let mut rng1 = ShiroRng::new_with_seed(0);
178    /// // Default initialization is just a shortcut for explicitly seeding with 0.
179    /// let mut rng2 = ShiroRng::default();
180    /// assert!(rng1 == rng2);
181    ///
182    /// let result1 = rng1.u64();
183    /// let result2 = rng2.u64();
184    /// assert!(result1 == result2);
185    /// ```
186    fn new_with_seed(seed: u64) -> Self;
187}
188
189/// Base trait that all RNGs must implement.
190pub trait YARandGenerator: Sized {
191    /// Creates a generator using randomness provided by the OS.
192    ///
193    /// Unlike [`YARandGenerator::new`], which will panic on failure, `try_new`
194    /// propagates the error-handling responsibility to the user. But the probability
195    /// of your operating systems RNG failing is absurdly low, and in the rare case that it
196    /// does fail, that's not really an issue most users are going to be able to address.
197    ///
198    /// Stick to using [`crate::new_rng`], unless you really need a generator of a
199    /// different type (you probably don't), then use `new` on your desired type.
200    fn try_new() -> Result<Self, getrandom::Error>;
201
202    /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>64</sup>).
203    fn u64(&mut self) -> u64;
204
205    /// Creates a generator using randomness provided by the OS.
206    ///
207    /// It is recommended to use the top-level [`crate::new_rng`] instead
208    /// of calling this function on a specific generator type.
209    ///
210    /// # Safety
211    ///
212    /// This function will panic if your OS rng fails to provide enough entropy.
213    /// But this is extremely unlikely, and unless you're working at the kernel level it's
214    /// not something you should ever be concerned with.
215    ///
216    /// Since Windows 10 this function is infallible, thanks to modern Windows versions adopting
217    /// a user-space cryptographic architecture that can't fail during runtime.
218    ///
219    /// # Examples
220    ///
221    /// ```
222    /// use ya_rand::*;
223    ///
224    /// // Recommended usage
225    /// let mut rng1 = new_rng();
226    /// // More explicit
227    /// let mut rng2 = ShiroRng::new();
228    /// // Even more explicit
229    /// let mut rng3 = Xoshiro256pp::new();
230    /// // Since these are all created using OS entropy, the odds of
231    /// // their initial states colliding is vanishingly small.
232    /// assert!(rng1 != rng2);
233    /// assert!(rng1 != rng3);
234    /// assert!(rng2 != rng3);
235    /// ```
236    #[inline]
237    fn new() -> Self {
238        Self::try_new().expect(
239            "WARNING: retrieving random data from the operating system should never fail; \
240            something has gone terribly wrong",
241        )
242    }
243
244    /// Returns a uniformly distributed `u32` in the interval [0, 2<sup>32</sup>).
245    #[inline]
246    fn u32(&mut self) -> u32 {
247        self.bits(u32::BITS) as u32
248    }
249
250    /// Returns a uniformly distributed `u16` in the interval [0, 2<sup>16</sup>).
251    #[inline]
252    fn u16(&mut self) -> u16 {
253        self.bits(u16::BITS) as u16
254    }
255
256    /// Returns a uniformly distributed `u8` in the interval [0, 2<sup>8</sup>).
257    #[inline]
258    fn u8(&mut self) -> u8 {
259        self.bits(u8::BITS) as u8
260    }
261
262    /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>`bit_count`</sup>).
263    ///
264    /// The value of `bit_count` is clamped to 64.
265    #[inline]
266    fn bits(&mut self, bit_count: u32) -> u64 {
267        self.u64() >> (u64::BITS - bit_count.min(u64::BITS))
268    }
269
270    /// A simple coinflip, returning a `bool` that has a 50% chance of being true.
271    ///
272    /// # Examples
273    ///
274    /// ```
275    /// use ya_rand::*;
276    ///
277    /// const ITERATIONS: u64 = 1 << 24;
278    /// let mut rng = new_rng();
279    /// let mut yes: u64 = 0;
280    /// let mut no: u64 = 0;
281    /// for _ in 0..ITERATIONS {
282    ///     if rng.bool() {
283    ///         yes += 1;
284    ///     } else {
285    ///         no += 1;
286    ///     }
287    /// }
288    /// // We expect the difference to be within ~5%.
289    /// let THRESHOLD: u64 = ITERATIONS / 20;
290    /// assert!(yes.abs_diff(no) <= THRESHOLD);
291    /// ```
292    #[inline]
293    fn bool(&mut self) -> bool {
294        // Compiles to a single "shr 63" instruction.
295        self.bits(1) == 1
296    }
297
298    /// Returns a uniformly distributed `u64` in the interval [0, `max`).
299    ///
300    /// Using [`YARandGenerator::bits`] when `max` happens to be a power of 2
301    /// will be significantly faster.
302    ///
303    /// # Examples
304    ///
305    /// ```
306    /// use ya_rand::*;
307    ///
308    /// let mut rng = new_rng();
309    /// // Special case: bound of 0 always returns 0.
310    /// assert!(rng.bound(0) == 0);
311    /// for i in 1..=2000 {
312    ///     for _ in 0..(i * 2) {
313    ///         assert!(rng.bound(i) < i);
314    ///     }
315    /// }
316    /// ```
317    #[inline]
318    fn bound(&mut self, max: u64) -> u64 {
319        // Lemire's nearly divisionless method: https://arxiv.org/abs/1805.10941.
320        let (mut high, mut low) = wide_mul(self.u64(), max);
321        match low < max {
322            false => {}
323            true => {
324                // The dreaded division.
325                let threshold = max.wrapping_neg() % max;
326                while low < threshold {
327                    (high, low) = wide_mul(self.u64(), max);
328                }
329            }
330        }
331        debug_assert!(
332            (max != 0 && high < max) || high == 0,
333            "BUG: assertion should be unreachable"
334        );
335        high
336    }
337
338    /// Returns a uniformly distributed `u64` in the interval \[0, `max`\].
339    ///
340    /// It is expected that `max` < `u64::MAX`.
341    ///
342    /// # Examples
343    ///
344    /// ```
345    /// use ya_rand::*;
346    ///
347    /// let mut rng = new_rng();
348    /// for i in 0..=2000 {
349    ///     for _ in 0..(i * 2) {
350    ///         assert!(rng.bound_inclusive(i) <= i);
351    ///     }
352    /// }
353    /// ```
354    #[inline]
355    fn bound_inclusive(&mut self, max: u64) -> u64 {
356        self.bound(max + 1)
357    }
358
359    /// Returns a uniformly distributed `i64` in the interval [`min`, `max`)
360    ///
361    /// It is expected that `min` < `max`.
362    #[inline]
363    fn range(&mut self, min: i64, max: i64) -> i64 {
364        let delta = max.abs_diff(min);
365        (self.bound(delta) as i64) + min
366    }
367
368    /// Returns a uniformly distributed `i64` in the interval \[`min`, `max`\]
369    ///
370    /// It is expected that `min` <= `max` and `max` < `u64::MAX`.
371    #[inline]
372    fn range_inclusive(&mut self, min: i64, max: i64) -> i64 {
373        self.range(min, max + 1)
374    }
375
376    /// Returns a uniformly distributed `f64` in the interval [0.0, 1.0).
377    #[inline]
378    fn f64(&mut self) -> f64 {
379        self.bits(F64_MANT) as f64 / F64_DIVISOR
380    }
381
382    /// Returns a uniformly distributed `f32` in the interval [0.0, 1.0).
383    #[inline]
384    fn f32(&mut self) -> f32 {
385        self.bits(F32_MANT) as f32 / F32_DIVISOR
386    }
387
388    /// Returns a uniformly distributed `f64` in the interval (0.0, 1.0].
389    #[inline]
390    fn f64_nonzero(&mut self) -> f64 {
391        // Interval of (0, 2^53]
392        let nonzero = self.bits(F64_MANT) + 1;
393        nonzero as f64 / F64_DIVISOR
394    }
395
396    /// Returns a uniformly distributed `f32` in the interval (0.0, 1.0].
397    #[inline]
398    fn f32_nonzero(&mut self) -> f32 {
399        // Interval of (0, 2^24]
400        let nonzero = self.bits(F32_MANT) + 1;
401        nonzero as f32 / F32_DIVISOR
402    }
403
404    /// Returns a uniformly distributed `f64` in the interval (-1.0, 1.0).
405    #[inline]
406    fn f64_wide(&mut self) -> f64 {
407        // This approach is faster than using Generator::range.
408        const BITS: u32 = F64_MANT + 1;
409        const OFFSET: i64 = F64_MAX_PRECISE as i64;
410        let mut x: i64;
411        loop {
412            // Start with an interval of [0, 2^54)
413            x = self.bits(BITS) as i64;
414            // Interval is now (0, 2^54)
415            match x != 0 {
416                true => break,
417                false => {}
418            }
419        }
420        // Shift interval to (-2^53, 2^53)
421        x -= OFFSET;
422        x as f64 / F64_DIVISOR
423    }
424
425    /// Returns a uniformly distributed `f32` in the interval (-1.0, 1.0).
426    #[inline]
427    fn f32_wide(&mut self) -> f32 {
428        // This approach is faster than using Generator::range.
429        const BITS: u32 = F32_MANT + 1;
430        const OFFSET: i64 = F32_MAX_PRECISE as i64;
431        let mut x: i64;
432        loop {
433            // Start with an interval of [0, 2^25)
434            x = self.bits(BITS) as i64;
435            // Interval is now (0, 2^25)
436            match x != 0 {
437                true => break,
438                false => {}
439            }
440        }
441        // Shift interval to (-2^24, 2^24)
442        x -= OFFSET;
443        x as f32 / F32_DIVISOR
444    }
445
446    /// Returns two indepedent and normally distributed `f64` values with
447    /// a `mean` of `0.0` and a `stddev` of `1.0`.
448    #[cfg(feature = "std")]
449    fn f64_normal(&mut self) -> (f64, f64) {
450        // Marsaglia polar method.
451        // TLDR: It projects a point within the unit
452        // circle onto the unit radius.
453        let mut x: f64;
454        let mut y: f64;
455        let mut s: f64;
456        loop {
457            x = self.f64_wide();
458            y = self.f64_wide();
459            s = (x * x) + (y * y);
460            // Reroll if `s` does not lie **within** the unit circle.
461            match s < 1.0 && s != 0.0 {
462                true => break,
463                false => {}
464            }
465        }
466        let t = (2.0 * s.ln().abs() / s).sqrt();
467        (x * t, y * t)
468    }
469
470    /// Returns two indepedent and normally distributed `f64` values with
471    /// user-defined `mean` and `stddev`.
472    ///
473    /// It is expected that `stddev.abs()` != `0.0`.
474    #[cfg(feature = "std")]
475    #[inline]
476    fn f64_normal_distribution(&mut self, mean: f64, stddev: f64) -> (f64, f64) {
477        let (x, y) = self.f64_normal();
478        (x.mul_add(stddev, mean), y.mul_add(stddev, mean))
479    }
480
481    /// Returns an exponentially distributed `f64` with `lambda` of `1.0`.
482    #[cfg(feature = "std")]
483    #[inline]
484    fn f64_exponential(&mut self) -> f64 {
485        // Using abs() instead of negating the result of ln() to
486        // eliminate the possibility of ever returning -0.0.
487        self.f64_nonzero().ln().abs()
488    }
489
490    /// Returns an exponentially distributed `f64` with user-defined `lambda`.
491    ///
492    /// It is expected that `lambda.abs()` != `0.0`.
493    #[cfg(feature = "std")]
494    #[inline]
495    fn f64_exponential_lambda(&mut self, lambda: f64) -> f64 {
496        self.f64_exponential() / lambda
497    }
498
499    /// Returns a randomly chosen item from the iterator of `collection`.
500    ///
501    /// Returns `None` when the length of the iterator is zero.
502    ///
503    /// # Examples
504    ///
505    /// ```
506    /// use ya_rand::*;
507    ///
508    /// const SIZE: usize = 1738;
509    /// let mut rng = new_rng();
510    /// let mut v = [0; SIZE];
511    /// for i in 0..SIZE {
512    ///     v[i] = i;
513    /// }
514    /// let (top, bottom) = v.split_at(v.len() / 2);
515    ///
516    /// // Sanity check.
517    /// let random_item = rng.choose(&v).expect("vector `v` is not empty");
518    /// assert!(v.contains(random_item));
519    ///
520    /// // Choose `random_item` from the top half of the array.
521    /// let random_item = rng.choose(top).expect("still not empty");
522    /// assert!(top.contains(random_item) == true);
523    ///
524    /// // We're looking in the bottom half so we won't find the
525    /// // `random_item` from the top half.
526    /// assert!(bottom.contains(random_item) == false);
527    /// ```
528    #[inline]
529    fn choose<C>(&mut self, collection: C) -> Option<C::Item>
530    where
531        C: IntoIterator,
532        C::IntoIter: ExactSizeIterator,
533    {
534        let mut iter = collection.into_iter();
535        let len = iter.len();
536        match len != 0 {
537            true => Some({
538                let idx = self.bound(len as u64) as usize;
539                // SAFETY: Since `bound` always returns a value less than
540                // it's input, `nth` will never return `None`.
541                unsafe { iter.nth(idx).unwrap_unchecked() }
542            }),
543            false => None,
544        }
545    }
546
547    /// Returns a randomly selected ASCII character from the pool of:
548    ///
549    /// `'A'..='Z'`, and`'a'..='z'`
550    #[inline]
551    fn ascii_alphabetic(&mut self) -> char {
552        *self.choose(&ALPHANUMERIC[..52]).unwrap() as char
553    }
554
555    /// Returns a randomly selected ASCII character from the pool of:
556    ///
557    /// `'A'..='Z'`
558    #[inline]
559    fn ascii_uppercase(&mut self) -> char {
560        *self.choose(&ALPHANUMERIC[..26]).unwrap() as char
561    }
562
563    /// Returns a randomly selected ASCII character from the pool of:
564    ///
565    /// `'a'..='z'`
566    #[inline]
567    fn ascii_lowercase(&mut self) -> char {
568        *self.choose(&ALPHANUMERIC[26..52]).unwrap() as char
569    }
570
571    /// Returns a randomly selected ASCII character from the pool of:
572    ///
573    /// `'A'..='Z'`, `'a'..='z'`, and `'0'..='9'`
574    #[inline]
575    fn ascii_alphanumeric(&mut self) -> char {
576        *self.choose(&ALPHANUMERIC[..]).unwrap() as char
577    }
578
579    /// Returns a randomly selected ASCII character from the pool of:
580    ///
581    /// `'0'..='9'`
582    #[inline]
583    fn ascii_digit(&mut self) -> char {
584        *self.choose(&ALPHANUMERIC[52..]).unwrap() as char
585    }
586
587    /// Performs a Fisher-Yates shuffle on the contents of `slice`.
588    ///
589    /// This implementation is the modern variant introduced by
590    /// Richard Durstenfeld. It is in-place and O(n).
591    ///
592    /// # Examples
593    ///
594    /// ```
595    /// use ya_rand::*;
596    ///
597    /// let mut rng = new_rng();
598    /// let mut data = [0; 1738];
599    /// for i in 0..data.len() {
600    ///     data[i] = i;
601    /// }
602    /// assert!(data.is_sorted() == true);
603    ///
604    /// rng.shuffle(&mut data);
605    /// assert!(data.is_sorted() == false);
606    /// ```
607    fn shuffle<T>(&mut self, slice: &mut [T]) {
608        let slice_ptr = slice.as_mut_ptr();
609        for i in (1..slice.len()).rev() {
610            let j = self.bound_inclusive(i as u64) as usize;
611            // SAFETY: Index 'i' will always be in bounds because it's
612            // bounded by slice length; index 'j' will always be
613            // in bounds because it's bounded by 'i'.
614            unsafe {
615                swap(slice_ptr.add(i), slice_ptr.add(j));
616            }
617        }
618    }
619
620    /// Clones `slice` into a new `Vec`, performs a Fisher-Yates
621    /// shuffle on it's contents, and returns the result.
622    ///
623    /// See [`YARandGenerator::shuffle`] for details/examples.
624    #[cfg(feature = "alloc")]
625    #[inline]
626    fn shuffle_cloned<T: Clone>(&mut self, slice: &[T]) -> Vec<T> {
627        let mut v = slice.to_vec();
628        self.shuffle(&mut v);
629        v
630    }
631}