1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
//! Reservoir sampling: collect a random sample of a known maximum //! size from an iterator of unknown length. //! //! Implements Jeffrey Vitter's Algorithm R (see //! https://en.wikipedia.org/wiki/Reservoir_sampling) extern crate rand; use rand::Rng; /// Return a random sample of a known maximum size from an iterator of /// unknown length. /// /// # Examples /// /// ``` /// # extern crate rand; /// # extern crate reservoir; /// # use rand::thread_rng; /// # use reservoir::sample; /// # fn main() { /// let iter = 0..10; /// /// let samples = sample(&mut thread_rng(), 4, iter); /// /// assert_eq!(4, samples.len()); /// assert!(samples.iter().all(|e| *e >= 0 && *e < 10)); /// # } /// ``` /// /// If the sampled iterator contains fewer items than the sample_size, /// all items are returned. /// /// ``` /// # extern crate rand; /// # extern crate reservoir; /// # use rand::thread_rng; /// # use reservoir::sample; /// # fn main() { /// let iter = 0..10; /// /// let samples : Vec<i32> = sample(&mut thread_rng(), 20, iter); /// let expected_samples : Vec<i32> = (0..10).collect(); /// /// assert_eq!(expected_samples, samples); /// # } /// ``` pub fn sample<I, RNG>(rng : &mut RNG, sample_size : usize, iter : I) -> Vec<I::Item> where I : Iterator, RNG : Rng { let mut samples = Vec::<I::Item>::with_capacity(sample_size); sample_into(&mut samples, rng, sample_size, iter); samples } /// Collect a random sample of a known maximum size from an iterator /// of unknown length into an existing Vec. /// /// # Examples /// /// For example: /// /// ``` /// # extern crate rand; /// # extern crate reservoir; /// # use rand::thread_rng; /// # use reservoir::sample_into; /// # fn main() { /// let iter = 0..10; /// let mut samples : Vec<i32> = Vec::new(); /// /// sample_into(&mut samples, &mut thread_rng(), 4, iter); /// /// assert_eq!(4, samples.len()); /// assert!(samples.iter().all(|e| *e >= 0 && *e < 10)); /// # } /// ``` /// /// Preserves any elements already in the vector: /// /// ``` /// # extern crate rand; /// # extern crate reservoir; /// # use rand::thread_rng; /// # use reservoir::sample_into; /// # fn main() { /// let iter = 0..10; /// let mut samples : Vec<i32> = vec![99,100]; /// /// sample_into(&mut samples, &mut thread_rng(), 4, iter); /// /// assert_eq!(6, samples.len()); /// assert_eq!(99, samples[0]); /// assert_eq!(100, samples[1]); /// assert!(samples[2..].iter().all(|e| *e >= 0 && *e < 10)); /// # } /// ``` pub fn sample_into<I, RNG>(samples : &mut Vec<I::Item>, rng : &mut RNG, sample_size : usize, iter : I) where I : Iterator, RNG : Rng { let original_length = samples.len(); let mut count : usize = 0; for element in iter { count += 1; if count <= sample_size { samples.push(element); } else { let index = rng.gen_range(0, count); if index < sample_size { samples[original_length+index] = element; } } } }