1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
//! Reservoir sampling: collect a random sample of a known maximum
//! size from an iterator of unknown length.
//!
//! Implements Jeffrey Vitter's Algorithm R (see
//! https://en.wikipedia.org/wiki/Reservoir_sampling)

extern crate rand;

use rand::Rng;

/// Return a random sample of a known maximum size from an iterator of
/// unknown length.
///
/// # Examples
/// 
/// ```
/// # extern crate rand;
/// # extern crate reservoir;
/// # use rand::thread_rng;
/// # use reservoir::sample;
/// # fn main() {
/// let iter = 0..10;
/// 
/// let samples = sample(&mut thread_rng(), 4, iter);
/// 
/// assert_eq!(4, samples.len());
/// assert!(samples.iter().all(|e| *e >= 0 && *e < 10));
/// # }
/// ```
/// 
/// If the sampled iterator contains fewer items than the sample_size,
/// all items are returned.
///
/// ```
/// # extern crate rand;
/// # extern crate reservoir;
/// # use rand::thread_rng;
/// # use reservoir::sample;
/// # fn main() {
/// let iter = 0..10;
/// 
/// let samples : Vec<i32> = sample(&mut thread_rng(), 20, iter);
/// let expected_samples : Vec<i32> = (0..10).collect();
/// 
/// assert_eq!(expected_samples, samples);
/// # }
/// ```

pub fn sample<I, RNG>(rng : &mut RNG, sample_size : usize, iter : I) -> Vec<I::Item>
    where I : Iterator,
          RNG : Rng
{
    let mut samples = Vec::<I::Item>::with_capacity(sample_size);
    sample_into(&mut samples, rng, sample_size, iter);
    samples
}

/// Collect a random sample of a known maximum size from an iterator
/// of unknown length into an existing Vec.
/// 
/// # Examples
///
/// For example:
///
/// ```
/// # extern crate rand;
/// # extern crate reservoir;
/// # use rand::thread_rng;
/// # use reservoir::sample_into;
/// # fn main() {
/// let iter = 0..10;
/// let mut samples : Vec<i32> = Vec::new();
/// 
/// sample_into(&mut samples, &mut thread_rng(), 4, iter);
/// 
/// assert_eq!(4, samples.len());
/// assert!(samples.iter().all(|e| *e >= 0 && *e < 10));
/// # }
/// ```
///
/// Preserves any elements already in the vector:
///
/// ```
/// # extern crate rand;
/// # extern crate reservoir;
/// # use rand::thread_rng;
/// # use reservoir::sample_into;
/// # fn main() {
/// let iter = 0..10;
/// let mut samples : Vec<i32> = vec![99,100];
/// 
/// sample_into(&mut samples, &mut thread_rng(), 4, iter);
/// 
/// assert_eq!(6, samples.len());
/// assert_eq!(99, samples[0]);
/// assert_eq!(100, samples[1]);
/// assert!(samples[2..].iter().all(|e| *e >= 0 && *e < 10));
/// # }
/// ```

pub fn sample_into<I, RNG>(samples : &mut Vec<I::Item>, rng : &mut RNG, sample_size : usize, iter : I)
    where I : Iterator,
          RNG : Rng
{
    let original_length = samples.len();
    let mut count : usize = 0;
    
    for element in iter {
        count += 1;
        
        if count <= sample_size {
            samples.push(element);
        } else {
            let index = rng.gen_range(0, count);
            if index < sample_size {
                samples[original_length+index] = element;
            }
        }
    }
}