ya_rand/rng.rs
1use crate::util::wide_mul;
2use core::ptr::swap;
3use core::slice::from_raw_parts_mut;
4
5#[cfg(feature = "alloc")]
6use {
7 crate::ya_rand_encoding::Encoder,
8 alloc::{boxed::Box, string::String, vec::Vec},
9};
10
11const F64_MANT: u32 = f64::MANTISSA_DIGITS;
12const F32_MANT: u32 = f32::MANTISSA_DIGITS;
13const F64_MAX_PRECISE: u64 = 1 << F64_MANT;
14const F32_MAX_PRECISE: u64 = 1 << F32_MANT;
15const F64_DIVISOR: f64 = F64_MAX_PRECISE as f64;
16const F32_DIVISOR: f32 = F32_MAX_PRECISE as f32;
17pub const ALPHANUMERIC: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
18
19/// Trait for RNGs that provide cryptographically secure data.
20pub trait SecureYARandGenerator: YARandGenerator {
21 /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
22 ///
23 /// # Examples
24 ///
25 /// ```
26 /// use ya_rand::*;
27 ///
28 /// let mut rng = new_rng_secure();
29 /// let mut data = [0; 1738];
30 /// rng.fill_bytes(&mut data);
31 /// assert!(data.into_iter().any(|v| v != 0));
32 /// ```
33 fn fill_bytes(&mut self, dst: &mut [u8]);
34
35 /// Fills `dst` with random data, which is safe to be used in cryptographic contexts.
36 ///
37 /// Differs from [`SecureYARandGenerator::fill_bytes`] in that the underlying type of `dst`
38 /// doesn't need to be any specific type.
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// use ya_rand::*;
44 ///
45 /// #[repr(C)]
46 /// #[derive(Clone, Copy, Default, PartialEq, Eq)]
47 /// struct NotAByte {
48 /// x: u16,
49 /// y: u32,
50 /// z: u64,
51 /// }
52 ///
53 /// let mut rng = new_rng_secure();
54 /// let zero_value = NotAByte::default();
55 /// let mut data = [zero_value; 69];
56 /// unsafe {
57 /// rng.fill_raw(&mut data);
58 /// }
59 /// assert!(data.into_iter().any(|v| v != zero_value));
60 /// ```
61 ///
62 /// # Safety
63 ///
64 /// `T` must be valid as nothing more than a collection of bytes.
65 /// Integer types are the simplest example of this, but structs of integer types
66 /// generally should fall under the same umbrella.
67 #[inline]
68 unsafe fn fill_raw<T>(&mut self, dst: &mut [T]) {
69 // SAFETY: The caller has promised not to be a fucking dumbass.
70 let dst_as_bytes = unsafe {
71 let data = dst.as_mut_ptr().cast();
72 let len = dst.len() * size_of::<T>();
73 from_raw_parts_mut(data, len)
74 };
75 self.fill_bytes(dst_as_bytes);
76 }
77
78 /// Generates a random `String` with length `len`, using the provided
79 /// `Encoder` to determine character set and minimum secure length. Since
80 /// character sets can only contain valid ascii values, the length of the created
81 /// `String` reprensents both the size of the `String` in bytes, and the
82 /// amount of characters it contains.
83 ///
84 /// Returns `None` when `len` is less than what the encoder declares to be secure.
85 ///
86 /// All provided encoders are accessible through [`crate::ya_rand_encoding`].
87 /// Users wishing to implement their own encoding must do so through the
88 /// [`Encoder`](crate::ya_rand_encoding::Encoder) trait.
89 ///
90 /// Originally inspired by golang's addition of [`rand.Text`] in release 1.24,
91 /// but altered to be encoding/length generic and unbiased for non-trivial bases.
92 ///
93 /// [`rand.Text`]:
94 /// https://cs.opensource.google/go/go/+/refs/tags/go1.24.0:src/crypto/rand/text.go
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// use ya_rand::*;
100 /// use ya_rand_encoding::Base16Lowercase as B16L;
101 ///
102 /// const LEN: usize = 420;
103 /// let mut rng = new_rng_secure();
104 /// // Safe to unwrap because 420 is greater than 32, which
105 /// // is the minimum secure length of `Base16Lowercase`.
106 /// let hex_string = rng.text::<B16L>(LEN).unwrap();
107 /// assert!(hex_string.len() == LEN);
108 /// assert!(hex_string.bytes().count() == LEN);
109 /// assert!(hex_string.chars().count() == LEN);
110 /// for c in hex_string.bytes() {
111 /// assert!(
112 /// (b'0' <= c && c <= b'9') ||
113 /// (b'a' <= c && c <= b'f')
114 /// );
115 /// }
116 /// ```
117 #[cfg(feature = "alloc")]
118 fn text<E: Encoder>(&mut self, len: usize) -> Option<String> {
119 match len >= E::MIN_LEN {
120 true => Some({
121 const BYTE_VALUES: usize = 1 << u8::BITS;
122 // SAFETY: u8's are a trivial type and we pwomise to
123 // always overwrite all of them UwU.
124 let mut data = unsafe { Box::new_uninit_slice(len).assume_init().into_vec() };
125 // This branch is evaluated at compile time, so concrete
126 // implementations in final binaries will only have the
127 // contents of the branch suitable for the encoder used.
128 if BYTE_VALUES % E::CHARSET.len() == 0 {
129 self.fill_bytes(&mut data);
130 // Directly map each random u8 to a character in the set.
131 // Since this branch is only reachable when length is a power
132 // of two, the modulo gets optimized out and the whole thing
133 // gets vectorized. The assembly for this branch is
134 // absolutely beautiful.
135 // This approach is extremely efficient, but only produces
136 // unbiased random sequences when the length of the current
137 // `CHARSET` is divisible by the amount of possible u8 values,
138 // which is why we need a fallback approach.
139 for d in &mut data {
140 let random_index = *d as usize;
141 *d = E::CHARSET[random_index % E::CHARSET.len()];
142 }
143 } else {
144 // Alternative approach that's potentially much slower,
145 // but always produces unbiased results.
146 // The unwrap gets optimized out since rustc can see that
147 // `E::CHARSET` has a non-zero length.
148 data.fill_with(|| *self.choose(E::CHARSET).unwrap());
149 }
150 // SAFETY: All provided encoders only use ascii values, and
151 // custom `Encoder` implementations agree to do the same when
152 // implementing the trait.
153 unsafe { String::from_utf8_unchecked(data) }
154 }),
155 false => None,
156 }
157 }
158}
159
160/// Trait for RNGs that can be created from a user-provided seed.
161pub trait SeedableYARandGenerator: YARandGenerator + Default {
162 /// Creates a generator from the output of an internal PRNG,
163 /// which is itself seeded from `seed`.
164 ///
165 /// As a rule: unless you are **absolutely certain** that you need to manually
166 /// seed a generator, you don't.
167 /// Instead, use [`crate::new_rng`] when you need to create a new instance.
168 ///
169 /// If you have a scenario where you really do need a set seed, prefer to use the
170 /// `Default` implementation of the desired generator.
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// use ya_rand::*;
176 ///
177 /// let mut rng1 = ShiroRng::new_with_seed(0);
178 /// // Default initialization is just a shortcut for explicitly seeding with 0.
179 /// let mut rng2 = ShiroRng::default();
180 /// assert!(rng1 == rng2);
181 ///
182 /// let result1 = rng1.u64();
183 /// let result2 = rng2.u64();
184 /// assert!(result1 == result2);
185 /// ```
186 fn new_with_seed(seed: u64) -> Self;
187}
188
189/// Base trait that all RNGs must implement.
190pub trait YARandGenerator: Sized {
191 /// Creates a generator using randomness provided by the OS.
192 ///
193 /// Unlike [`YARandGenerator::new`], which will panic on failure, `try_new`
194 /// propagates the error-handling responsibility to the user. But the probability
195 /// of your operating systems RNG failing is absurdly low, and in the rare case that it
196 /// does fail, that's not really an issue most users are going to be able to address.
197 ///
198 /// Stick to using [`crate::new_rng`], unless you really need a generator of a
199 /// different type (you probably don't), then use `new` on your desired type.
200 fn try_new() -> Result<Self, getrandom::Error>;
201
202 /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>64</sup>).
203 fn u64(&mut self) -> u64;
204
205 /// Creates a generator using randomness provided by the OS.
206 ///
207 /// It is recommended to use the top-level [`crate::new_rng`] instead
208 /// of calling this function on a specific generator type.
209 ///
210 /// # Safety
211 ///
212 /// This function will panic if your OS rng fails to provide enough entropy.
213 /// But this is extremely unlikely, and unless you're working at the kernel level it's
214 /// not something you should ever be concerned with.
215 ///
216 /// Since Windows 10 this function is infallible, thanks to modern Windows versions adopting
217 /// a user-space cryptographic architecture that can't fail during runtime.
218 ///
219 /// # Examples
220 ///
221 /// ```
222 /// use ya_rand::*;
223 ///
224 /// // Recommended usage
225 /// let mut rng1 = new_rng();
226 /// // More explicit
227 /// let mut rng2 = ShiroRng::new();
228 /// // Even more explicit
229 /// let mut rng3 = Xoshiro256pp::new();
230 /// // Since these are all created using OS entropy, the odds of
231 /// // their initial states colliding is vanishingly small.
232 /// assert!(rng1 != rng2);
233 /// assert!(rng1 != rng3);
234 /// assert!(rng2 != rng3);
235 /// ```
236 #[inline]
237 fn new() -> Self {
238 Self::try_new().expect(
239 "WARNING: retrieving random data from the operating system should never fail; \
240 something has gone terribly wrong",
241 )
242 }
243
244 /// Returns a uniformly distributed `u32` in the interval [0, 2<sup>32</sup>).
245 #[inline]
246 fn u32(&mut self) -> u32 {
247 self.bits(u32::BITS) as u32
248 }
249
250 /// Returns a uniformly distributed `u16` in the interval [0, 2<sup>16</sup>).
251 #[inline]
252 fn u16(&mut self) -> u16 {
253 self.bits(u16::BITS) as u16
254 }
255
256 /// Returns a uniformly distributed `u8` in the interval [0, 2<sup>8</sup>).
257 #[inline]
258 fn u8(&mut self) -> u8 {
259 self.bits(u8::BITS) as u8
260 }
261
262 /// Returns a uniformly distributed `u64` in the interval [0, 2<sup>`bit_count`</sup>).
263 ///
264 /// The value of `bit_count` is clamped to 64.
265 #[inline]
266 fn bits(&mut self, bit_count: u32) -> u64 {
267 self.u64() >> (u64::BITS - bit_count.min(u64::BITS))
268 }
269
270 /// A simple coinflip, returning a `bool` that has a 50% chance of being true.
271 ///
272 /// # Examples
273 ///
274 /// ```
275 /// use ya_rand::*;
276 ///
277 /// const ITERATIONS: u64 = 1 << 24;
278 /// let mut rng = new_rng();
279 /// let mut yes: u64 = 0;
280 /// let mut no: u64 = 0;
281 /// for _ in 0..ITERATIONS {
282 /// if rng.bool() {
283 /// yes += 1;
284 /// } else {
285 /// no += 1;
286 /// }
287 /// }
288 /// // We expect the difference to be within ~5%.
289 /// let THRESHOLD: u64 = ITERATIONS / 20;
290 /// assert!(yes.abs_diff(no) <= THRESHOLD);
291 /// ```
292 #[inline]
293 fn bool(&mut self) -> bool {
294 // Compiles to a single "shr 63" instruction.
295 self.bits(1) == 1
296 }
297
298 /// Returns a uniformly distributed `u64` in the interval [0, `max`).
299 ///
300 /// Using [`YARandGenerator::bits`] when `max` happens to be a power of 2
301 /// will be significantly faster.
302 ///
303 /// # Examples
304 ///
305 /// ```
306 /// use ya_rand::*;
307 ///
308 /// let mut rng = new_rng();
309 /// // Special case: bound of 0 always returns 0.
310 /// assert!(rng.bound(0) == 0);
311 /// for i in 1..=2000 {
312 /// for _ in 0..(i * 2) {
313 /// assert!(rng.bound(i) < i);
314 /// }
315 /// }
316 /// ```
317 #[inline]
318 fn bound(&mut self, max: u64) -> u64 {
319 // Lemire's nearly divisionless method: https://arxiv.org/abs/1805.10941.
320 let (mut high, mut low) = wide_mul(self.u64(), max);
321 match low < max {
322 false => {}
323 true => {
324 // The dreaded division.
325 let threshold = max.wrapping_neg() % max;
326 while low < threshold {
327 (high, low) = wide_mul(self.u64(), max);
328 }
329 }
330 }
331 debug_assert!(
332 (max != 0 && high < max) || high == 0,
333 "BUG: assertion should be unreachable"
334 );
335 high
336 }
337
338 /// Returns a uniformly distributed `u64` in the interval \[0, `max`\].
339 ///
340 /// It is expected that `max` < `u64::MAX`.
341 ///
342 /// # Examples
343 ///
344 /// ```
345 /// use ya_rand::*;
346 ///
347 /// let mut rng = new_rng();
348 /// for i in 0..=2000 {
349 /// for _ in 0..(i * 2) {
350 /// assert!(rng.bound_inclusive(i) <= i);
351 /// }
352 /// }
353 /// ```
354 #[inline]
355 fn bound_inclusive(&mut self, max: u64) -> u64 {
356 self.bound(max + 1)
357 }
358
359 /// Returns a uniformly distributed `i64` in the interval [`min`, `max`)
360 ///
361 /// It is expected that `min` < `max`.
362 #[inline]
363 fn range(&mut self, min: i64, max: i64) -> i64 {
364 let delta = max.abs_diff(min);
365 (self.bound(delta) as i64) + min
366 }
367
368 /// Returns a uniformly distributed `i64` in the interval \[`min`, `max`\]
369 ///
370 /// It is expected that `min` <= `max` and `max` < `u64::MAX`.
371 #[inline]
372 fn range_inclusive(&mut self, min: i64, max: i64) -> i64 {
373 self.range(min, max + 1)
374 }
375
376 /// Returns a uniformly distributed `f64` in the interval [0.0, 1.0).
377 #[inline]
378 fn f64(&mut self) -> f64 {
379 self.bits(F64_MANT) as f64 / F64_DIVISOR
380 }
381
382 /// Returns a uniformly distributed `f32` in the interval [0.0, 1.0).
383 #[inline]
384 fn f32(&mut self) -> f32 {
385 self.bits(F32_MANT) as f32 / F32_DIVISOR
386 }
387
388 /// Returns a uniformly distributed `f64` in the interval (0.0, 1.0].
389 #[inline]
390 fn f64_nonzero(&mut self) -> f64 {
391 // Interval of (0, 2^53]
392 let nonzero = self.bits(F64_MANT) + 1;
393 nonzero as f64 / F64_DIVISOR
394 }
395
396 /// Returns a uniformly distributed `f32` in the interval (0.0, 1.0].
397 #[inline]
398 fn f32_nonzero(&mut self) -> f32 {
399 // Interval of (0, 2^24]
400 let nonzero = self.bits(F32_MANT) + 1;
401 nonzero as f32 / F32_DIVISOR
402 }
403
404 /// Returns a uniformly distributed `f64` in the interval (-1.0, 1.0).
405 #[inline]
406 fn f64_wide(&mut self) -> f64 {
407 // This approach is faster than using Generator::range.
408 const BITS: u32 = F64_MANT + 1;
409 const OFFSET: i64 = F64_MAX_PRECISE as i64;
410 let mut x: i64;
411 loop {
412 // Start with an interval of [0, 2^54)
413 x = self.bits(BITS) as i64;
414 // Interval is now (0, 2^54)
415 match x != 0 {
416 true => break,
417 false => {}
418 }
419 }
420 // Shift interval to (-2^53, 2^53)
421 x -= OFFSET;
422 x as f64 / F64_DIVISOR
423 }
424
425 /// Returns a uniformly distributed `f32` in the interval (-1.0, 1.0).
426 #[inline]
427 fn f32_wide(&mut self) -> f32 {
428 // This approach is faster than using Generator::range.
429 const BITS: u32 = F32_MANT + 1;
430 const OFFSET: i64 = F32_MAX_PRECISE as i64;
431 let mut x: i64;
432 loop {
433 // Start with an interval of [0, 2^25)
434 x = self.bits(BITS) as i64;
435 // Interval is now (0, 2^25)
436 match x != 0 {
437 true => break,
438 false => {}
439 }
440 }
441 // Shift interval to (-2^24, 2^24)
442 x -= OFFSET;
443 x as f32 / F32_DIVISOR
444 }
445
446 /// Returns two indepedent and normally distributed `f64` values with
447 /// a `mean` of `0.0` and a `stddev` of `1.0`.
448 #[cfg(feature = "std")]
449 fn f64_normal(&mut self) -> (f64, f64) {
450 // Marsaglia polar method.
451 // TLDR: It projects a point within the unit
452 // circle onto the unit radius.
453 let mut x: f64;
454 let mut y: f64;
455 let mut s: f64;
456 loop {
457 x = self.f64_wide();
458 y = self.f64_wide();
459 s = (x * x) + (y * y);
460 // Reroll if `s` does not lie **within** the unit circle.
461 match s < 1.0 && s != 0.0 {
462 true => break,
463 false => {}
464 }
465 }
466 let t = (2.0 * s.ln().abs() / s).sqrt();
467 (x * t, y * t)
468 }
469
470 /// Returns two indepedent and normally distributed `f64` values with
471 /// user-defined `mean` and `stddev`.
472 ///
473 /// It is expected that `stddev.abs()` != `0.0`.
474 #[cfg(feature = "std")]
475 #[inline]
476 fn f64_normal_distribution(&mut self, mean: f64, stddev: f64) -> (f64, f64) {
477 let (x, y) = self.f64_normal();
478 (x.mul_add(stddev, mean), y.mul_add(stddev, mean))
479 }
480
481 /// Returns an exponentially distributed `f64` with `lambda` of `1.0`.
482 #[cfg(feature = "std")]
483 #[inline]
484 fn f64_exponential(&mut self) -> f64 {
485 // Using abs() instead of negating the result of ln() to
486 // eliminate the possibility of ever returning -0.0.
487 self.f64_nonzero().ln().abs()
488 }
489
490 /// Returns an exponentially distributed `f64` with user-defined `lambda`.
491 ///
492 /// It is expected that `lambda.abs()` != `0.0`.
493 #[cfg(feature = "std")]
494 #[inline]
495 fn f64_exponential_lambda(&mut self, lambda: f64) -> f64 {
496 self.f64_exponential() / lambda
497 }
498
499 /// Returns a randomly chosen item from the iterator of `collection`.
500 ///
501 /// Returns `None` when the length of the iterator is zero.
502 ///
503 /// # Examples
504 ///
505 /// ```
506 /// use ya_rand::*;
507 ///
508 /// const SIZE: usize = 1738;
509 /// let mut rng = new_rng();
510 /// let mut v = [0; SIZE];
511 /// for i in 0..SIZE {
512 /// v[i] = i;
513 /// }
514 /// let (top, bottom) = v.split_at(v.len() / 2);
515 ///
516 /// // Sanity check.
517 /// let random_item = rng.choose(&v).expect("vector `v` is not empty");
518 /// assert!(v.contains(random_item));
519 ///
520 /// // Choose `random_item` from the top half of the array.
521 /// let random_item = rng.choose(top).expect("still not empty");
522 /// assert!(top.contains(random_item) == true);
523 ///
524 /// // We're looking in the bottom half so we won't find the
525 /// // `random_item` from the top half.
526 /// assert!(bottom.contains(random_item) == false);
527 /// ```
528 #[inline]
529 fn choose<C>(&mut self, collection: C) -> Option<C::Item>
530 where
531 C: IntoIterator,
532 C::IntoIter: ExactSizeIterator,
533 {
534 let mut iter = collection.into_iter();
535 let len = iter.len();
536 match len != 0 {
537 true => Some({
538 let idx = self.bound(len as u64) as usize;
539 // SAFETY: Since `bound` always returns a value less than
540 // it's input, `nth` will never return `None`.
541 unsafe { iter.nth(idx).unwrap_unchecked() }
542 }),
543 false => None,
544 }
545 }
546
547 /// Returns a randomly selected ASCII character from the pool of:
548 ///
549 /// `'A'..='Z'`, and`'a'..='z'`
550 #[inline]
551 fn ascii_alphabetic(&mut self) -> char {
552 *self.choose(&ALPHANUMERIC[..52]).unwrap() as char
553 }
554
555 /// Returns a randomly selected ASCII character from the pool of:
556 ///
557 /// `'A'..='Z'`
558 #[inline]
559 fn ascii_uppercase(&mut self) -> char {
560 *self.choose(&ALPHANUMERIC[..26]).unwrap() as char
561 }
562
563 /// Returns a randomly selected ASCII character from the pool of:
564 ///
565 /// `'a'..='z'`
566 #[inline]
567 fn ascii_lowercase(&mut self) -> char {
568 *self.choose(&ALPHANUMERIC[26..52]).unwrap() as char
569 }
570
571 /// Returns a randomly selected ASCII character from the pool of:
572 ///
573 /// `'A'..='Z'`, `'a'..='z'`, and `'0'..='9'`
574 #[inline]
575 fn ascii_alphanumeric(&mut self) -> char {
576 *self.choose(&ALPHANUMERIC[..]).unwrap() as char
577 }
578
579 /// Returns a randomly selected ASCII character from the pool of:
580 ///
581 /// `'0'..='9'`
582 #[inline]
583 fn ascii_digit(&mut self) -> char {
584 *self.choose(&ALPHANUMERIC[52..]).unwrap() as char
585 }
586
587 /// Performs a Fisher-Yates shuffle on the contents of `slice`.
588 ///
589 /// This implementation is the modern variant introduced by
590 /// Richard Durstenfeld. It is in-place and O(n).
591 ///
592 /// # Examples
593 ///
594 /// ```
595 /// use ya_rand::*;
596 ///
597 /// let mut rng = new_rng();
598 /// let mut data = [0; 1738];
599 /// for i in 0..data.len() {
600 /// data[i] = i;
601 /// }
602 /// assert!(data.is_sorted() == true);
603 ///
604 /// rng.shuffle(&mut data);
605 /// assert!(data.is_sorted() == false);
606 /// ```
607 fn shuffle<T>(&mut self, slice: &mut [T]) {
608 let slice_ptr = slice.as_mut_ptr();
609 for i in (1..slice.len()).rev() {
610 let j = self.bound_inclusive(i as u64) as usize;
611 // SAFETY: Index 'i' will always be in bounds because it's
612 // bounded by slice length; index 'j' will always be
613 // in bounds because it's bounded by 'i'.
614 unsafe {
615 swap(slice_ptr.add(i), slice_ptr.add(j));
616 }
617 }
618 }
619
620 /// Clones `slice` into a new `Vec`, performs a Fisher-Yates
621 /// shuffle on it's contents, and returns the result.
622 ///
623 /// See [`YARandGenerator::shuffle`] for details/examples.
624 #[cfg(feature = "alloc")]
625 #[inline]
626 fn shuffle_cloned<T: Clone>(&mut self, slice: &[T]) -> Vec<T> {
627 let mut v = slice.to_vec();
628 self.shuffle(&mut v);
629 v
630 }
631}