ya_rand/rng.rs
1use crate::util::{as_raw_bytes_mut, wide_mul};
2use core::ptr::swap;
3
4#[cfg(feature = "alloc")]
5use {
6 crate::ya_rand_encoding::Encoder,
7 alloc::{string::String, vec, vec::Vec},
8};
9
10const F64_MANT: u32 = f64::MANTISSA_DIGITS;
11const F32_MANT: u32 = f32::MANTISSA_DIGITS;
12const F64_MAX_PRECISE: u64 = 1 << F64_MANT;
13const F32_MAX_PRECISE: u64 = 1 << F32_MANT;
14const F64_DIVISOR: f64 = F64_MAX_PRECISE as f64;
15const F32_DIVISOR: f32 = F32_MAX_PRECISE as f32;
16pub const ALPHANUMERIC: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
17
18/// Trait for RNGs that provide cryptographically secure data.
19pub trait SecureYARandGenerator: YARandGenerator {
20 /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
21 ///
22 /// # Examples
23 ///
24 /// ```
25 /// use ya_rand::*;
26 ///
27 /// let mut rng = new_rng_secure();
28 /// let mut data = [0; 1738];
29 /// rng.fill_bytes(&mut data);
30 /// assert!(data.into_iter().any(|v| v != 0));
31 /// ```
32 fn fill_bytes(&mut self, dst: &mut [u8]);
33
34 /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
35 ///
36 /// Differs from [`SecureYARandGenerator::fill_bytes`] in that the underlying type of `dst`
37 /// doesn't need to be any specific type.
38 ///
39 /// # Examples
40 ///
41 /// ```
42 /// use ya_rand::*;
43 ///
44 /// #[repr(C)]
45 /// #[derive(Clone, Copy, Default, PartialEq, Eq)]
46 /// struct NotAByte {
47 /// x: u16,
48 /// y: u32,
49 /// z: u64,
50 /// }
51 ///
52 /// let mut rng = new_rng_secure();
53 /// let zero_value = NotAByte::default();
54 /// let mut data = [zero_value; 69];
55 /// unsafe {
56 /// rng.fill_raw(&mut data);
57 /// }
58 /// assert!(data.into_iter().any(|v| v != zero_value));
59 /// ```
60 ///
61 /// # Safety
62 ///
63 /// `T` must be valid as nothing more than a collection of bytes.
64 /// Integer types are the simplest example of this, but structs of integer types
65 /// generally should fall under the same umbrella.
66 #[inline]
67 unsafe fn fill_raw<T>(&mut self, dst: &mut [T]) {
68 // SAFETY: The caller has promised not to be a fucking dumbass.
69 let dst_as_bytes = unsafe { as_raw_bytes_mut(dst) };
70 self.fill_bytes(dst_as_bytes);
71 }
72
73 /// Generates a random `String` with length `len`, using the provided
74 /// `Encoder` to determine character set and minimum secure length. Since
75 /// character sets can only contain valid ascii values, the length of the created
76 /// `String` reprensents both the size of the `String` in bytes, and the
77 /// amount of characters it contains.
78 ///
79 /// Returns `None` when `len` is less than what the encoder declares to be secure.
80 ///
81 /// All provided encoders are accessible through [`crate::ya_rand_encoding`].
82 /// Users wishing to implement their own encoding must do so through the
83 /// [`Encoder`](crate::ya_rand_encoding::Encoder) trait.
84 ///
85 /// Originally inspired by golang's addition of [`rand.Text`] in release 1.24,
86 /// but altered to be encoding/length generic and unbiased for non-trivial bases.
87 ///
88 /// [`rand.Text`]:
89 /// https://cs.opensource.google/go/go/+/refs/tags/go1.24.0:src/crypto/rand/text.go
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// use ya_rand::*;
95 /// use ya_rand_encoding::Base16Lowercase as B16L;
96 ///
97 /// const LEN: usize = 420;
98 /// let mut rng = new_rng_secure();
99 /// // Safe to unwrap because 420 is greater than 32, which
100 /// // is the minimum secure length of `Base16Lowercase`.
101 /// let hex_string = rng.text::<B16L>(LEN).unwrap();
102 /// assert!(hex_string.len() == LEN);
103 /// assert!(hex_string.bytes().count() == LEN);
104 /// assert!(hex_string.chars().count() == LEN);
105 /// for c in hex_string.bytes() {
106 /// assert!(
107 /// (b'0' <= c && c <= b'9') ||
108 /// (b'a' <= c && c <= b'f')
109 /// );
110 /// }
111 /// ```
112 #[cfg(feature = "alloc")]
113 fn text<E: Encoder>(&mut self, len: usize) -> Option<String> {
114 match len >= E::MIN_LEN {
115 true => Some({
116 const BYTE_VALUES: usize = 1 << u8::BITS;
117 // TODO: Test if forcing page faulting with non-zero
118 // initializer improves perfermance.
119 let mut data = vec![0; len];
120 // This branch is evaluated at compile time, so concrete
121 // implementations in final binaries will only have the
122 // contents of the branch suitable for the encoder used.
123 if BYTE_VALUES % E::CHARSET.len() == 0 {
124 self.fill_bytes(&mut data);
125 // Directly map each random u8 to a character in the set.
126 // Since this branch is only reachable when length is a power
127 // of two, the modulo gets optimized out and the whole thing
128 // gets vectorized. The assembly for this branch is
129 // absolutely beautiful.
130 // This approach is extremely efficient, but only produces
131 // unbiased random sequences when the length of the current
132 // `CHARSET` is divisible by the amount of possible u8 values,
133 // which is why we need a fallback approach.
134 for d in &mut data {
135 let random_index = *d as usize;
136 *d = E::CHARSET[random_index % E::CHARSET.len()];
137 }
138 } else {
139 // Alternative approach that's potentially much slower,
140 // but always produces unbiased results.
141 // The unwrap gets optimized out since rustc can see that
142 // `E::CHARSET` has a non-zero length.
143 data.fill_with(|| *self.choose(E::CHARSET).unwrap());
144 }
145 // SAFETY: All provided encoders only use ascii values, and
146 // custom `Encoder` implementations agree to do the same when
147 // implementing the trait.
148 unsafe { String::from_utf8_unchecked(data) }
149 }),
150 false => None,
151 }
152 }
153}
154
155/// Trait for RNGs that can be created from a user-provided seed.
156pub trait SeedableYARandGenerator: YARandGenerator + Default {
157 /// Creates a generator from the output of an internal PRNG,
158 /// which is itself seeded from `seed`.
159 ///
160 /// As a rule: unless you are **absolutely certain** that you need to manually
161 /// seed a generator, you don't.
162 /// Instead, use [`crate::new_rng`] when you need to create a new instance.
163 ///
164 /// If you have a scenario where you really do need a set seed, prefer to use the
165 /// `Default` implementation of the desired generator.
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// use ya_rand::*;
171 ///
172 /// let mut rng1 = ShiroRng::new_with_seed(0);
173 /// // Default initialization is just a shortcut for explicitly seeding with 0.
174 /// let mut rng2 = ShiroRng::default();
175 /// assert!(rng1 == rng2);
176 ///
177 /// let result1 = rng1.u64();
178 /// let result2 = rng2.u64();
179 /// assert!(result1 == result2);
180 /// ```
181 fn new_with_seed(seed: u64) -> Self;
182}
183
184/// Base trait that all RNGs must implement.
185pub trait YARandGenerator: Sized {
186 /// Creates a generator using randomness provided by the OS.
187 ///
188 /// Unlike [`YARandGenerator::new`], which will panic on failure, `try_new`
189 /// propagates the error-handling responsibility to the user. But the probability
190 /// of your operating systems RNG failing is absurdly low, and in the rare case that it
191 /// does fail, that's not really an issue most users are going to be able to address.
192 ///
193 /// Stick to using [`crate::new_rng`], unless you really need a generator of a
194 /// different type (you probably don't), then use `new` on your desired type.
195 fn try_new() -> Result<Self, getrandom::Error>;
196
197 /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>64</sup>).
198 fn u64(&mut self) -> u64;
199
200 /// Creates a generator using randomness provided by the OS.
201 ///
202 /// It is recommended to use the top-level [`crate::new_rng`] instead
203 /// of calling this function on a specific generator type.
204 ///
205 /// # Safety
206 ///
207 /// This function will panic if your OS rng fails to provide enough entropy.
208 /// But this is extremely unlikely, and unless you're working at the kernel level it's
209 /// not something you should ever be concerned with.
210 ///
211 /// Since Windows 10 this function is infallible, thanks to modern Windows versions adopting
212 /// a user-space cryptographic architecture that can't fail during runtime.
213 ///
214 /// # Examples
215 ///
216 /// ```
217 /// use ya_rand::*;
218 ///
219 /// // Recommended usage
220 /// let mut rng1 = new_rng();
221 /// // More explicit
222 /// let mut rng2 = ShiroRng::new();
223 /// // Even more explicit
224 /// let mut rng3 = Xoshiro256pp::new();
225 /// // Since these are all created using OS entropy, the odds of
226 /// // their initial states colliding is vanishingly small.
227 /// assert!(rng1 != rng2);
228 /// assert!(rng1 != rng3);
229 /// assert!(rng2 != rng3);
230 /// ```
231 #[inline]
232 fn new() -> Self {
233 Self::try_new().expect("retrieving random data from the operating system should never fail")
234 }
235
236 /// Returns a uniformly distributed `usize` in the interval \[0, `usize::MAX`\].
237 #[inline]
238 fn usize(&mut self) -> usize {
239 self.bits(usize::BITS) as usize
240 }
241
242 /// Returns a uniformly distributed `u32` in the interval [0, 2<sup>32</sup>).
243 #[inline]
244 fn u32(&mut self) -> u32 {
245 self.bits(u32::BITS) as u32
246 }
247
248 /// Returns a uniformly distributed `u16` in the interval [0, 2<sup>16</sup>).
249 #[inline]
250 fn u16(&mut self) -> u16 {
251 self.bits(u16::BITS) as u16
252 }
253
254 /// Returns a uniformly distributed `u8` in the interval [0, 2<sup>8</sup>).
255 #[inline]
256 fn u8(&mut self) -> u8 {
257 self.bits(u8::BITS) as u8
258 }
259
260 /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>`bit_count`</sup>).
261 ///
262 /// The value of `bit_count` is clamped to 64.
263 #[inline]
264 fn bits(&mut self, bit_count: u32) -> u64 {
265 self.u64() >> (u64::BITS - bit_count.min(u64::BITS))
266 }
267
268 /// A simple coinflip, returning a `bool` that has a 50% chance of being true.
269 ///
270 /// # Examples
271 ///
272 /// ```
273 /// use ya_rand::*;
274 ///
275 /// const ITERATIONS: u64 = 1 << 24;
276 /// let mut rng = new_rng();
277 /// let mut yes: u64 = 0;
278 /// let mut no: u64 = 0;
279 /// for _ in 0..ITERATIONS {
280 /// if rng.bool() {
281 /// yes += 1;
282 /// } else {
283 /// no += 1;
284 /// }
285 /// }
286 /// // We expect the difference to be within ~5%.
287 /// let THRESHOLD: u64 = ITERATIONS / 20;
288 /// assert!(yes.abs_diff(no) <= THRESHOLD);
289 /// ```
290 #[inline]
291 fn bool(&mut self) -> bool {
292 // Compiles to a single "shr 63" instruction.
293 self.bits(1) == 1
294 }
295
296 /// Returns a uniformly distributed `u64` in the interval [0, `max`).
297 ///
298 /// Using [`YARandGenerator::bits`] when `max` happens to be a power of 2
299 /// will be significantly faster.
300 ///
301 /// # Examples
302 ///
303 /// ```
304 /// use ya_rand::*;
305 ///
306 /// let mut rng = new_rng();
307 /// // Special case: bound of 0 always returns 0.
308 /// assert!(rng.bound(0) == 0);
309 /// for i in 1..=2000 {
310 /// for _ in 0..(i * 2) {
311 /// assert!(rng.bound(i) < i);
312 /// }
313 /// }
314 /// ```
315 #[inline]
316 fn bound(&mut self, max: u64) -> u64 {
317 // Lemire's nearly divisionless method: https://arxiv.org/abs/1805.10941.
318 let (mut high, mut low) = wide_mul(self.u64(), max);
319 match low < max {
320 false => {}
321 true => {
322 // The dreaded division.
323 let threshold = max.wrapping_neg() % max;
324 while low < threshold {
325 (high, low) = wide_mul(self.u64(), max);
326 }
327 }
328 }
329 debug_assert!(
330 (max != 0 && high < max) || high == 0,
331 "BUG: assertion should be unreachable"
332 );
333 high
334 }
335
336 /// Returns a uniformly distributed `u64` in the interval \[0, `max`\].
337 ///
338 /// It is expected that `max` < `u64::MAX`.
339 ///
340 /// # Examples
341 ///
342 /// ```
343 /// use ya_rand::*;
344 ///
345 /// let mut rng = new_rng();
346 /// for i in 0..=2000 {
347 /// for _ in 0..(i * 2) {
348 /// assert!(rng.bound_inclusive(i) <= i);
349 /// }
350 /// }
351 /// ```
352 #[inline]
353 fn bound_inclusive(&mut self, max: u64) -> u64 {
354 self.bound(max + 1)
355 }
356
357 /// Returns a uniformly distributed `i64` in the interval [`min`, `max`)
358 ///
359 /// It is expected that `min` < `max`.
360 #[inline]
361 fn range(&mut self, min: i64, max: i64) -> i64 {
362 let delta = max.abs_diff(min);
363 (self.bound(delta) as i64) + min
364 }
365
366 /// Returns a uniformly distributed `i64` in the interval \[`min`, `max`\]
367 ///
368 /// It is expected that `min` <= `max` and `max` < `u64::MAX`.
369 #[inline]
370 fn range_inclusive(&mut self, min: i64, max: i64) -> i64 {
371 self.range(min, max + 1)
372 }
373
374 /// Returns a uniformly distributed `f64` in the interval [0.0, 1.0).
375 #[inline]
376 fn f64(&mut self) -> f64 {
377 self.bits(F64_MANT) as f64 / F64_DIVISOR
378 }
379
380 /// Returns a uniformly distributed `f32` in the interval [0.0, 1.0).
381 #[inline]
382 fn f32(&mut self) -> f32 {
383 self.bits(F32_MANT) as f32 / F32_DIVISOR
384 }
385
386 /// Returns a uniformly distributed `f64` in the interval (0.0, 1.0].
387 #[inline]
388 fn f64_nonzero(&mut self) -> f64 {
389 // Interval of (0, 2^53]
390 let nonzero = self.bits(F64_MANT) + 1;
391 nonzero as f64 / F64_DIVISOR
392 }
393
394 /// Returns a uniformly distributed `f32` in the interval (0.0, 1.0].
395 #[inline]
396 fn f32_nonzero(&mut self) -> f32 {
397 // Interval of (0, 2^24]
398 let nonzero = self.bits(F32_MANT) + 1;
399 nonzero as f32 / F32_DIVISOR
400 }
401
402 /// Returns a uniformly distributed `f64` in the interval (-1.0, 1.0).
403 #[inline]
404 fn f64_wide(&mut self) -> f64 {
405 // This approach is faster than using Generator::range.
406 const BITS: u32 = F64_MANT + 1;
407 const OFFSET: i64 = F64_MAX_PRECISE as i64;
408 let mut x: i64;
409 loop {
410 // Start with an interval of [0, 2^54)
411 x = self.bits(BITS) as i64;
412 // Interval is now (0, 2^54)
413 match x != 0 {
414 true => break,
415 false => {}
416 }
417 }
418 // Shift interval to (-2^53, 2^53)
419 x -= OFFSET;
420 x as f64 / F64_DIVISOR
421 }
422
423 /// Returns a uniformly distributed `f32` in the interval (-1.0, 1.0).
424 #[inline]
425 fn f32_wide(&mut self) -> f32 {
426 // This approach is faster than using Generator::range.
427 const BITS: u32 = F32_MANT + 1;
428 const OFFSET: i64 = F32_MAX_PRECISE as i64;
429 let mut x: i64;
430 loop {
431 // Start with an interval of [0, 2^25)
432 x = self.bits(BITS) as i64;
433 // Interval is now (0, 2^25)
434 match x != 0 {
435 true => break,
436 false => {}
437 }
438 }
439 // Shift interval to (-2^24, 2^24)
440 x -= OFFSET;
441 x as f32 / F32_DIVISOR
442 }
443
444 /// Returns two indepedent and normally distributed `f64` values with
445 /// a `mean` of `0.0` and a `stddev` of `1.0`.
446 #[cfg(feature = "std")]
447 fn f64_normal(&mut self) -> (f64, f64) {
448 // Marsaglia polar method.
449 // TLDR: It projects a point within the unit
450 // circle onto the unit radius.
451 let mut x: f64;
452 let mut y: f64;
453 let mut s: f64;
454 loop {
455 x = self.f64_wide();
456 y = self.f64_wide();
457 s = (x * x) + (y * y);
458 // Reroll if `s` does not lie **within** the unit circle.
459 match s < 1.0 && s != 0.0 {
460 true => break,
461 false => {}
462 }
463 }
464 let t = (2.0 * s.ln().abs() / s).sqrt();
465 (x * t, y * t)
466 }
467
468 /// Returns two indepedent and normally distributed `f64` values with
469 /// user-defined `mean` and `stddev`.
470 ///
471 /// It is expected that `stddev.abs()` != `0.0`.
472 #[cfg(feature = "std")]
473 #[inline]
474 fn f64_normal_distribution(&mut self, mean: f64, stddev: f64) -> (f64, f64) {
475 let (x, y) = self.f64_normal();
476 (x.mul_add(stddev, mean), y.mul_add(stddev, mean))
477 }
478
479 /// Returns an exponentially distributed `f64` with `lambda` of `1.0`.
480 #[cfg(feature = "std")]
481 #[inline]
482 fn f64_exponential(&mut self) -> f64 {
483 // Using abs() instead of negating the result of ln() to
484 // eliminate the possibility of ever returning -0.0.
485 self.f64_nonzero().ln().abs()
486 }
487
488 /// Returns an exponentially distributed `f64` with user-defined `lambda`.
489 ///
490 /// It is expected that `lambda.abs()` != `0.0`.
491 #[cfg(feature = "std")]
492 #[inline]
493 fn f64_exponential_lambda(&mut self, lambda: f64) -> f64 {
494 self.f64_exponential() / lambda
495 }
496
497 /// Returns a randomly chosen item from the iterator of `collection`.
498 ///
499 /// Returns `None` when the length of the iterator is zero.
500 ///
501 /// # Examples
502 ///
503 /// ```
504 /// use ya_rand::*;
505 ///
506 /// const SIZE: usize = 1738;
507 /// let mut rng = new_rng();
508 /// let mut v = [0; SIZE];
509 /// for i in 0..SIZE {
510 /// v[i] = i;
511 /// }
512 /// let (top, bottom) = v.split_at(v.len() / 2);
513 ///
514 /// // Sanity check.
515 /// let random_item = rng.choose(&v).expect("vector `v` is not empty");
516 /// assert!(v.contains(random_item));
517 ///
518 /// // Choose `random_item` from the top half of the array.
519 /// let random_item = rng.choose(top).expect("still not empty");
520 /// assert!(top.contains(random_item) == true);
521 ///
522 /// // We're looking in the bottom half so we won't find the
523 /// // `random_item` from the top half.
524 /// assert!(bottom.contains(random_item) == false);
525 /// ```
526 #[inline]
527 fn choose<C>(&mut self, collection: C) -> Option<C::Item>
528 where
529 C: IntoIterator,
530 C::IntoIter: ExactSizeIterator,
531 {
532 let mut iter = collection.into_iter();
533 let len = iter.len();
534 match len != 0 {
535 true => Some({
536 let idx = self.bound(len as u64) as usize;
537 // SAFETY: Since `bound` always returns a value less than
538 // it's input, `nth` will never return `None`.
539 unsafe { iter.nth(idx).unwrap_unchecked() }
540 }),
541 false => None,
542 }
543 }
544
545 /// Returns a randomly selected ASCII character from the pool of:
546 ///
547 /// `'A'..='Z'`, and`'a'..='z'`
548 #[inline]
549 fn ascii_alphabetic(&mut self) -> char {
550 *self.choose(&ALPHANUMERIC[..52]).unwrap() as char
551 }
552
553 /// Returns a randomly selected ASCII character from the pool of:
554 ///
555 /// `'A'..='Z'`
556 #[inline]
557 fn ascii_uppercase(&mut self) -> char {
558 *self.choose(&ALPHANUMERIC[..26]).unwrap() as char
559 }
560
561 /// Returns a randomly selected ASCII character from the pool of:
562 ///
563 /// `'a'..='z'`
564 #[inline]
565 fn ascii_lowercase(&mut self) -> char {
566 *self.choose(&ALPHANUMERIC[26..52]).unwrap() as char
567 }
568
569 /// Returns a randomly selected ASCII character from the pool of:
570 ///
571 /// `'A'..='Z'`, `'a'..='z'`, and `'0'..='9'`
572 #[inline]
573 fn ascii_alphanumeric(&mut self) -> char {
574 *self.choose(&ALPHANUMERIC[..]).unwrap() as char
575 }
576
577 /// Returns a randomly selected ASCII character from the pool of:
578 ///
579 /// `'0'..='9'`
580 #[inline]
581 fn ascii_digit(&mut self) -> char {
582 *self.choose(&ALPHANUMERIC[52..]).unwrap() as char
583 }
584
585 /// Performs a Fisher-Yates shuffle on the contents of `slice`.
586 ///
587 /// This implementation is the modern variant introduced by
588 /// Richard Durstenfeld. It is in-place and O(n).
589 ///
590 /// # Examples
591 ///
592 /// ```
593 /// use ya_rand::*;
594 ///
595 /// let mut rng = new_rng();
596 /// let mut data = [0; 1738];
597 /// for i in 0..data.len() {
598 /// data[i] = i;
599 /// }
600 /// assert!(data.is_sorted() == true);
601 ///
602 /// rng.shuffle(&mut data);
603 /// assert!(data.is_sorted() == false);
604 /// ```
605 fn shuffle<T>(&mut self, slice: &mut [T]) {
606 let slice_ptr = slice.as_mut_ptr();
607 for i in (1..slice.len()).rev() {
608 let j = self.bound_inclusive(i as u64) as usize;
609 // SAFETY: Index 'i' will always be in bounds because it's
610 // bounded by slice length; index 'j' will always be
611 // in bounds because it's bounded by 'i'.
612 unsafe {
613 swap(slice_ptr.add(i), slice_ptr.add(j));
614 }
615 }
616 }
617
618 /// Clones `slice` into a new `Vec`, performs a Fisher-Yates
619 /// shuffle on it's contents, and returns the result.
620 ///
621 /// See [`YARandGenerator::shuffle`] for details/examples.
622 #[cfg(feature = "alloc")]
623 #[inline]
624 fn shuffle_cloned<T: Clone>(&mut self, slice: &[T]) -> Vec<T> {
625 let mut v = slice.to_vec();
626 self.shuffle(&mut v);
627 v
628 }
629}