rand_unique/
sequence.rs

1use crate::builder::{QuadraticResidue, RandomSequenceBuilder};
2
3/// Generate a deterministic pseudo-random sequence of unique numbers.
4///
5/// Not cryptographically secure.
6///
7/// Properties:
8/// - The sequence is deterministic and repeatable.
9/// - The sequence will only include each number once (every index is unique).
10/// - Computing the value for any random index in the sequence is an O(1) operation.
11///
12/// Based on the article by @preshing:
13/// Article: http://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/
14/// Source: https://github.com/preshing/RandomSequence/blob/master/randomsequence.h
15#[derive(Debug, Clone)]
16pub struct RandomSequence<T>
17where
18    T: QuadraticResidue
19{
20    /// The config/builder holds the parameters that define the sequence.
21    pub config: RandomSequenceBuilder<T>,
22
23    /// Internal iterator-only state.
24    pub(crate) start_index: T,
25    pub(crate) current_index: T,
26    pub(crate) intermediate_offset: T,
27
28    /// The end marker, required for the ExactSizeIterator so that we terminate correctly.
29    pub(crate) ended: bool,
30}
31
32impl<T> RandomSequence<T>
33where
34    T: QuadraticResidue
35{
36    /// Get the next element in the sequence.
37    #[inline]
38    pub fn next(&mut self) -> Option<T> {
39        let next = self.n_internal(self.start_index.wrapping_add(&self.current_index));
40        self.current_index = match self.current_index.checked_add(&T::one()) {
41            Some(v) => {
42                self.ended = false;
43                v
44            },
45            None => {
46                if !self.ended {
47                    self.ended = true;
48                    self.current_index
49                } else {
50                    return None
51                }
52            },
53        };
54        Some(next)
55    }
56
57    /// Get the next element in the sequence, cycling the sequence once we reach the end.
58    ///
59    /// This will ignore the internal [RandomSequence::ended] marker, and potentially confuse an
60    /// exact size iterator if it had reached the end.
61    #[inline]
62    pub fn wrapping_next(&mut self) -> T {
63        let next = self.n_internal(self.start_index.wrapping_add(&self.current_index));
64        self.current_index = self.current_index.wrapping_add(&T::one());
65        next
66    }
67
68    /// Get the previous element in the sequence.
69    #[inline]
70    pub fn prev(&mut self) -> Option<T> {
71        // decrement then compute, opposite to next()
72        self.current_index = match self.current_index.checked_sub(&T::one()) {
73            Some(v) => v,
74            None => return None,
75        };
76        self.ended = false;
77        Some(self.n_internal(self.start_index.wrapping_add(&self.current_index)))
78    }
79
80    /// Get the previous element in the sequence, cycling the sequence once we reach the start.
81    #[inline]
82    pub fn wrapping_prev(&mut self) -> T {
83        // decrement then compute, opposite to next()
84        self.current_index = self.current_index.wrapping_sub(&T::one());
85        self.n_internal(self.start_index.wrapping_add(&self.current_index))
86    }
87
88    /// Get the nth element in the sequence.
89    #[inline]
90    pub fn n(&self, index: T) -> T {
91        let actual_index = self.start_index.wrapping_add(&index);
92        self.n_internal(actual_index)
93    }
94
95    /// Get the nth element in the sequence, but using the absolute index rather than relative to `start_index`.
96    ///
97    /// `qpr(qpr(index + intermediate_offset) ^ intermediate_xor)`
98    #[inline(always)]
99    fn n_internal(&self, index: T) -> T {
100        let inner_residue = self.config.permute_qpr(index).wrapping_add(&self.intermediate_offset);
101        self.config.permute_qpr(inner_residue ^ self.config.intermediate_xor)
102    }
103
104    /// Get the current position in the sequence. Will return `None` if the sequence has been exhausted.
105    #[inline]
106    pub fn index(&self) -> Option<T> {
107        match self.ended {
108            false => Some(self.current_index),
109            true => None,
110        }
111    }
112
113    /// Check if this sequence has been exhausted.
114    #[inline]
115    pub fn exhausted(&self) -> bool {
116        self.ended
117    }
118
119    /// Set the index for the iterator.
120    ///
121    /// If the iterator was exhausted, this will reset it to the index set.
122    #[inline]
123    pub fn set_index(&mut self, index: T) {
124        self.current_index = index;
125        self.ended = false;
126    }
127}
128
129macro_rules! impl_unsized_iterator {
130    ($T:ident) => {
131        impl Iterator for RandomSequence<$T> {
132            type Item = $T;
133
134            #[inline]
135            fn next(&mut self) -> Option<Self::Item> {
136                self.next()
137            }
138
139            #[inline]
140            fn size_hint(&self) -> (usize, Option<usize>) {
141                ($T::MAX as usize - self.current_index as usize, None)
142            }
143        }
144    };
145}
146
147macro_rules! impl_exact_size_iterator {
148    ($T:ident) => {
149        impl Iterator for RandomSequence<$T> {
150            type Item = $T;
151
152            #[inline]
153            fn next(&mut self) -> Option<Self::Item> {
154                self.next()
155            }
156
157            #[inline]
158            fn size_hint(&self) -> (usize, Option<usize>) {
159                ($T::MAX as usize + 1 - self.current_index as usize, Some($T::MAX as usize + 1 - self.current_index as usize))
160            }
161        }
162
163        impl ExactSizeIterator for RandomSequence<$T> {}
164    };
165}
166
167// Can only fit exact size iterators in types smaller than usize. As usize will have usize+1 elements.
168impl_exact_size_iterator!(u8);
169impl_exact_size_iterator!(u16);
170#[cfg(target_pointer_width = "64")]
171impl_exact_size_iterator!(u32);
172#[cfg(target_pointer_width = "32")]
173impl_unsized_iterator!(u32);
174impl_unsized_iterator!(u64);
175impl_unsized_iterator!(usize);
176
177impl<T> DoubleEndedIterator for RandomSequence<T>
178where
179    T: QuadraticResidue,
180    RandomSequence<T>: Iterator<Item = T>,
181{
182    #[inline]
183    fn next_back(&mut self) -> Option<Self::Item> {
184        self.prev()
185    }
186}
187
188impl<T> From<RandomSequenceBuilder<T>> for RandomSequence<T>
189where
190    T: QuadraticResidue,
191    RandomSequence<T>: Iterator<Item = T>,
192{
193    #[inline]
194    fn from(value: RandomSequenceBuilder<T>) -> Self {
195        value.into_iter()
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use std::collections::{HashMap, HashSet};
202    use std::vec::Vec;
203
204    use rand::rngs::SysRng;
205    use statrs::distribution::{ChiSquared, ContinuousCDF};
206
207    use super::*;
208
209    fn is_send<T: Send>() {}
210    fn is_sync<T: Sync>() {}
211
212    macro_rules! test_sequence {
213        ($name:ident, $type:ident, $check:literal) => {
214            #[test]
215            fn $name() {
216                let config = RandomSequenceBuilder::<$type>::new(0, 0);
217                let sequence = config.into_iter();
218
219                for (i, num) in std::iter::zip(0..10, sequence.clone()) {
220                    assert_eq!(sequence.n(i as $type), num);
221                }
222
223                for (i, num) in std::iter::zip(0..10, sequence.clone().rev()) {
224                    assert_eq!(sequence.n($type::MAX.wrapping_sub(i as $type)), num);
225                }
226
227                // check the exact size iterator ends correctly for u8 and u16
228                if ($type::MAX as usize) < $check {
229                    let nums_vec: Vec<$type> = config.into_iter().take($check + 10).collect();
230                    assert_eq!(nums_vec.len(), ($type::MAX as usize).saturating_add(1));
231                }
232
233                // check that we see each value only once
234                let nums: HashSet<$type> = config.into_iter().take($check).collect();
235                assert_eq!(nums.len(), $check);
236
237                // check exhaustion
238                {
239                    let mut sequence = config.into_iter();
240
241                    // initial index = 0
242                    assert!(!sequence.exhausted());
243                    assert_eq!(sequence.index(), Some(0));
244                    let first = sequence.next().unwrap();
245
246                    // final index
247                    sequence.set_index($type::MAX);
248                    assert!(!sequence.exhausted());
249                    assert_eq!(sequence.index(), Some($type::MAX));
250                    let last = sequence.next().unwrap();
251
252                    // exhausted
253                    assert!(sequence.exhausted());
254                    assert_eq!(sequence.index(), None);
255                    assert!(sequence.next().is_none());
256
257                    // reset
258                    sequence.set_index($type::MAX);
259                    assert!(!sequence.exhausted());
260                    assert_eq!(sequence.index(), Some($type::MAX));
261                    assert_eq!(sequence.next(), Some(last));
262                    assert!(sequence.exhausted());  // gets exhausted again
263
264                    // reset to 0
265                    sequence.set_index(0);
266                    assert!(!sequence.exhausted());
267                    assert_eq!(sequence.index(), Some(0));
268                    assert_eq!(sequence.next(), Some(first));
269                }
270
271                // check sequence is send and sync (although index won't be synced between threads)
272                is_send::<RandomSequence<$type>>();
273                is_sync::<RandomSequence<$type>>();
274            }
275        };
276    }
277
278    test_sequence!(test_u8_sequence, u8, 256);
279    test_sequence!(test_u16_sequence, u16, 65536);
280    test_sequence!(test_u32_sequence, u32, 100_000);
281    test_sequence!(test_u64_sequence, u64, 100_000);
282    test_sequence!(test_usize_sequence, usize, 100_000);
283
284    macro_rules! test_exact_size_iterator {
285        ($name:ident, $type:ident) => {
286            #[test]
287            fn $name() {
288                let config = RandomSequenceBuilder::<$type>::new(0, 0);
289                let mut sequence = config.clone().into_iter();
290                assert_eq!(sequence.len(), $type::MAX as usize + 1);  // eg. 256 items, but u8::MAX = 255
291                sequence.next();
292                assert_eq!(sequence.len(), $type::MAX as usize);
293                sequence.next();
294                assert_eq!(sequence.len(), $type::MAX as usize - 1);
295                sequence.next();
296                assert_eq!(sequence.len(), $type::MAX as usize - 2);
297                sequence.prev();
298                assert_eq!(sequence.len(), $type::MAX as usize - 1);
299
300                sequence.next();
301                sequence.next();
302                sequence.next();
303                assert_eq!(sequence.len(), $type::MAX as usize - 4);
304
305                // don't collect a u32, only test this on u8 and u16
306                if sequence.len() <= u16::MAX as usize + 1 {
307                    let remaining_len = sequence.len();
308                    let remaining_items: Vec<_> = sequence.collect();
309                    assert_eq!(remaining_len, remaining_items.len());
310
311                    // double check a fresh sequence
312                    let sequence = config.into_iter();
313                    assert_eq!(sequence.len(), $type::MAX as usize + 1);
314                    let all_items: Vec<_> = sequence.collect();
315                    assert_eq!(all_items.len(), $type::MAX as usize + 1);
316                    assert_eq!(all_items.len(), remaining_items.len() + 5);
317                }
318            }
319        };
320    }
321
322    test_exact_size_iterator!(test_u8_exact_size_iterator, u8);
323    test_exact_size_iterator!(test_u16_exact_size_iterator, u16);
324    #[cfg(target_pointer_width = "64")]
325    test_exact_size_iterator!(test_u32_exact_size_iterator, u32);
326
327    macro_rules! test_distribution {
328        ($name:ident, $type:ident, $check:literal) => {
329            #[ignore]  // ChiSquared p value is too unreliable
330            #[test]
331            fn $name() {
332                const BUCKETS: usize = 100;
333                let config = RandomSequenceBuilder::<$type>::rand(&mut SysRng);
334
335                // compute a normalised histogram over the sequence with BUCKETS buckets, where each bucket value
336                // is the percentage of values that fall into this bucket
337                let mut data_buckets: HashMap<usize, usize> = HashMap::with_capacity(BUCKETS + 1);
338                config
339                    .into_iter()
340                    .take($check)
341                    .map(|i| ((i as f64 / $type::MAX as f64) * BUCKETS as f64) as usize)
342                    .for_each(|i| *data_buckets.entry(i).or_insert(0) += 1);
343                let data_buckets: Vec<f64> = (0..=BUCKETS)
344                    .map(|i| *data_buckets.get(&i).unwrap_or(&0) as f64)
345                    .collect();
346
347                // compute the probability of each bucket being hit, assuming a uniform distribution.
348                // careful for u8 where we have 256 for only 100 buckets; and so some buckets have 2 vs 3 expected values,
349                // as this represents the percentage of values that should fall into each bucket assuming perfectly uniform.
350                let mut uniform_buckets: Vec<f64> = (0..BUCKETS)
351                    .map(|_| ($check as f64 / BUCKETS as f64))
352                    .collect();
353                uniform_buckets.push($check as f64 / $type::MAX as f64); // last bucket for value=$type::MAX
354
355                // compute chi-squared statistic
356                assert_eq!(data_buckets.len(), uniform_buckets.len(), "Data and uniform buckets logic issue.");
357                let chi_squared = std::iter::zip(data_buckets.iter(), uniform_buckets.iter())
358                    .map(|(x, e)| (x - e).powi(2) / e)
359                    .sum::<f64>();
360
361                // compute p_value from chi-squared statistic
362                let chi_dist = ChiSquared::new((BUCKETS - 1) as f64).unwrap();
363                let p_value = 1.0 - chi_dist.cdf(chi_squared);
364
365                // FIXME: choose a better test, because this doesn't strictly confirm the uniform distribution
366                //   and there is a suspiciously large amount of variance in the p_values between test runs.
367                // p_value <= 0.05 would say with 95% certainty that this distribution is _not_ uniform
368                assert!(p_value > 0.05, "Unexpectedly rejected the null hypothesis with high probability. stat: {}, p: {}", chi_squared, p_value);
369            }
370        };
371    }
372
373    test_distribution!(test_u8_distribution, u8, 256);
374    test_distribution!(test_u16_distribution, u16, 65536);
375    test_distribution!(test_u32_distribution, u32, 100_000);
376    test_distribution!(test_u64_distribution, u64, 100_000);
377    test_distribution!(test_usize_distribution, usize, 100_000);
378}
rand_unique/sequence.rs

rand_unique/
sequence.rs