Skip to main content

sql_splitter/sample/
reservoir.rs

1//! Reservoir sampling implementation (Algorithm R).
2//!
3//! Provides uniform random sampling over a stream of items
4//! with a fixed-size reservoir.
5
6use rand::rngs::StdRng;
7use rand::Rng;
8
9/// Reservoir sampler using Algorithm R.
10///
11/// Maintains a fixed-size sample of items seen so far,
12/// with each item having equal probability of being in the sample.
13#[derive(Debug)]
14pub struct Reservoir<T> {
15    /// Maximum capacity of the reservoir
16    capacity: usize,
17    /// Total count of items seen
18    count: usize,
19    /// Current items in the reservoir
20    items: Vec<T>,
21    /// Random number generator
22    rng: StdRng,
23}
24
25impl<T> Reservoir<T> {
26    /// Create a new reservoir with the given capacity
27    pub fn new(capacity: usize, rng: StdRng) -> Self {
28        Self {
29            capacity,
30            count: 0,
31            items: Vec::with_capacity(capacity),
32            rng,
33        }
34    }
35
36    /// Consider an item for inclusion in the reservoir
37    pub fn consider(&mut self, item: T) {
38        self.count += 1;
39
40        if self.items.len() < self.capacity {
41            // Reservoir not full yet - add item
42            self.items.push(item);
43        } else {
44            // Reservoir full - randomly replace
45            let j = self.rng.random_range(0..self.count);
46            if j < self.capacity {
47                self.items[j] = item;
48            }
49        }
50    }
51
52    /// Get the number of items seen so far
53    pub fn total_seen(&self) -> usize {
54        self.count
55    }
56
57    /// Get the current size of the reservoir
58    pub fn len(&self) -> usize {
59        self.items.len()
60    }
61
62    /// Check if the reservoir is empty
63    pub fn is_empty(&self) -> bool {
64        self.items.is_empty()
65    }
66
67    /// Get the capacity of the reservoir
68    pub fn capacity(&self) -> usize {
69        self.capacity
70    }
71
72    /// Consume the reservoir and return the sampled items
73    pub fn into_items(self) -> Vec<T> {
74        self.items
75    }
76
77    /// Get a reference to the current items
78    pub fn items(&self) -> &[T] {
79        &self.items
80    }
81}