1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
use std::arch::x86_64::*;
use crate::_internal::{FSCALE32, FSCALE64};
macro_rules! randi_wide {
(i 32) => {
i64
};
(i 64) => {
i128
};
(u 32) => {
u64
};
(u 64) => {
u128
};
}
macro_rules! impl_rng_trait {
($bits:expr) => {
paste::paste! {
#[doc = concat!("A trait for ", $bits, "-bit random number generators.")]
///
/// # Examples
///
/// ```
#[doc = concat!("use urng::rng::Rng", $bits, ";")]
#[doc = concat!("use urng::rng::Choice", $bits, ";")]
#[doc = concat!("use urng::rng", $bits, "::Xorshift", $bits, ";")]
///
#[doc = concat!("let mut rng = Xorshift", $bits, "::new(1);")]
#[doc = concat!("let val = rng.randi(1, 6);")]
#[doc = concat!("assert!((1..=6).contains(&val));")]
#[doc = concat!("assert!(rng.randf(0.0, 1.0_f", $bits, ") < 1.0);")]
#[doc = concat!("let items = [\"a\", \"b\", \"c\"];")]
#[doc = concat!("assert!(items.contains(rng.choice(&items)));")]
/// ```
pub trait [<Rng $bits>] {
#[doc = concat!("Generates the next random `u", $bits, "` value in the range [0, 2^", $bits, ").")]
fn nextu(&mut self) -> [<u $bits>];
#[doc = concat!("Generates the next random `f", $bits, "` value in the range [0, 1).")]
#[inline(always)]
fn nextf(&mut self) -> [<f $bits>] {
self.nextu() as [<f $bits>] * [<FSCALE $bits>]
}
#[doc = concat!("Generates a random `i", $bits, "` value in the range [min, max].")]
#[inline(always)]
fn randi(&mut self, min: [<i $bits>], max: [<i $bits>]) -> [<i $bits>] {
let range = (max as randi_wide!(i $bits) - min as randi_wide!(i $bits) + 1)
as randi_wide!(u $bits);
((self.nextu() as randi_wide!(u $bits) * range) >> $bits) as [<i $bits>] + min
}
#[doc = concat!("Generates a random `f", $bits, "` value in the range [min, max).")]
#[inline(always)]
fn randf(&mut self, min: [<f $bits>], max: [<f $bits>]) -> [<f $bits>] {
let scale = (max - min) * [<FSCALE $bits>];
(self.nextu() as [<f $bits>] * scale) + min
}
}
pub trait [<Choice $bits>]: [<Rng $bits>] {
/// Returns a random element from a slice.
#[inline(always)]
fn choice<'a, T>(&mut self, choices: &'a [T]) -> &'a T {
let index = self.randi(0, choices.len() as [<i $bits>] - 1);
&choices[index as usize]
}
}
impl<T: [<Rng $bits>] + ?Sized> [<Choice $bits>] for T {}
}
};
}
impl_rng_trait!(32);
impl_rng_trait!(64);
pub trait Rng32V256 {
/// Generates the next random `__m256i` value containing 8 `u32` integers in the range [0, 2^32).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX2 instructions. The caller must check for AVX2 support before calling this function to avoid undefined behavior.
unsafe fn nextuv(&mut self) -> __m256i;
/// Generates the next random `__m256` value containing 8 `f32` floats in the range [0.0, 1.0).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX2 instructions. The caller must check for AVX2 support before calling this function to avoid undefined behavior.
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn nextfv(&mut self, v_scale: __m256) -> __m256 {
let uv = unsafe { self.nextuv() };
let fv = _mm256_cvtepi32_ps(uv);
_mm256_mul_ps(fv, v_scale)
}
/// Generates a random `__m256i` value containing 8 `i32` integers in the range [v_min, v_min + v_range).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX2 instructions. The caller must check for AVX2 support before calling this function to avoid undefined behavior.
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn randiv(&mut self, v_range: __m256i, v_min: __m256i) -> __m256i {
const MERGE_MASK: u8 = 0xAA;
let uv = unsafe { self.nextuv() };
let prod_even = _mm256_mul_epu32(uv, v_range);
let res_even = _mm256_srli_epi64(prod_even, 32);
let v_u32_shifted = _mm256_srli_epi64(uv, 32);
let prod_odd = _mm256_mul_epu32(v_u32_shifted, v_range);
let merged = unsafe { _mm256_mask_blend_epi32(MERGE_MASK, res_even, prod_odd) };
_mm256_add_epi32(merged, v_min)
}
/// Generates a random `__m256` value containing 8 `f32` floats in the range [v_min, v_min + v_mult).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX2 instructions. The caller must check for AVX2 support before calling this function to avoid undefined behavior.
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn randfv(&mut self, v_mult: __m256, v_min: __m256) -> __m256 {
let uv = unsafe { self.nextuv() };
let fv = _mm256_cvtepi32_ps(uv);
_mm256_add_ps(_mm256_mul_ps(fv, v_mult), v_min)
}
/// Generates the next random `u32` integers in the range [0, 2^32) and returns them as an array of 8 elements.
#[inline(always)]
fn nextu(&mut self) -> [u32; 8] {
unsafe { std::mem::transmute(self.nextuv()) }
}
/// Generates the next random `f32` floats in the range [0.0, 1.0) and returns them as an array of 8 elements.
#[inline(always)]
fn nextf(&mut self) -> [f32; 8] {
self.nextu().map(|x| x as f32 * FSCALE32)
}
}
pub trait Rng32V512 {
/// Generates the next random `__m512i` value containing 16 `u32` integers in the range [0, 2^32).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX-512F instructions. The caller must check for AVX-512F support before calling this function to avoid undefined behavior.
unsafe fn nextuv(&mut self) -> __m512i;
/// Generates the next random `__m512` value containing 16 `f32` floats in the range [0.0, 1.0).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX-512F instructions. The caller must check for AVX-512F support before calling this function to avoid undefined behavior.
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn nextfv(&mut self, v_scale: __m512) -> __m512 {
let uv = unsafe { self.nextuv() };
let fv = _mm512_cvtepu32_ps(uv);
_mm512_mul_ps(fv, v_scale)
}
/// Generates a random `__m512i` value containing 16 `i32` integers in the range [v_min, v_min + v_range).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX-512F instructions. The caller must check for AVX-512F support before calling this function to avoid undefined behavior.
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn randiv(&mut self, v_range: __m512i, v_min: __m512i) -> __m512i {
const MERGE_MASK: u16 = 0xAAAA;
let uv = unsafe { self.nextuv() };
let prod_even = _mm512_mul_epu32(uv, v_range);
let res_even = _mm512_srli_epi64(prod_even, 32);
let v_u32_shifted = _mm512_srli_epi64(uv, 32);
let prod_odd = _mm512_mul_epu32(v_u32_shifted, v_range);
let merged = _mm512_mask_blend_epi32(MERGE_MASK, res_even, prod_odd);
_mm512_add_epi32(merged, v_min)
}
/// Generates a random `__m512` value containing 16 `f32` floats in the range [v_min, v_min + v_mult).
///
/// # Safety
///
/// This function is unsafe because it relies on the caller to ensure that the CPU supports AVX-512F instructions. The caller must check for AVX-512F support before calling this function to avoid undefined behavior.
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn randfv(&mut self, v_mult: __m512, v_min: __m512) -> __m512 {
let uv = unsafe { self.nextuv() };
let fv = _mm512_cvtepu32_ps(uv);
_mm512_add_ps(_mm512_mul_ps(fv, v_mult), v_min)
}
/// Generates the next random `u32` integers in the range [0, 2^32) and returns them as an array of 16 elements.
#[inline(always)]
fn nextu(&mut self) -> [u32; 16] {
unsafe { std::mem::transmute(self.nextuv()) }
}
/// Generates the next random `f32` floats in the range [0.0, 1.0) and returns them as an array of 16 elements.
#[inline(always)]
fn nextf(&mut self) -> [f32; 16] {
self.nextu().map(|x| x as f32 * FSCALE32)
}
}