ruvector_memopt/algorithms/
sketch.rs

1//! Count-Min Sketch for sublinear frequency estimation
2//!
3//! Provides O(1) space approximate frequency counting for memory patterns.
4//! Used to detect frequent memory pressure events without storing all history.
5
6use std::hash::{Hash, Hasher};
7use std::collections::hash_map::DefaultHasher;
8use chrono::Timelike;
9
10/// Count-Min Sketch for approximate frequency counting
11/// Space: O(width * depth), Query: O(depth), Update: O(depth)
12pub struct CountMinSketch {
13    /// 2D array of counters
14    counters: Vec<Vec<u64>>,
15    /// Width of each row
16    width: usize,
17    /// Number of hash functions (rows)
18    depth: usize,
19    /// Total items added
20    total_count: u64,
21}
22
23impl CountMinSketch {
24    /// Create a new sketch with given error bounds
25    /// - epsilon: error factor (smaller = more accurate, more space)
26    /// - delta: probability of exceeding error bound
27    pub fn new(epsilon: f64, delta: f64) -> Self {
28        let width = (std::f64::consts::E / epsilon).ceil() as usize;
29        let depth = (1.0 / delta).ln().ceil() as usize;
30
31        Self::with_dimensions(width.max(64), depth.max(4))
32    }
33
34    /// Create with specific dimensions
35    pub fn with_dimensions(width: usize, depth: usize) -> Self {
36        Self {
37            counters: vec![vec![0u64; width]; depth],
38            width,
39            depth,
40            total_count: 0,
41        }
42    }
43
44    /// Hash function for row i
45    fn hash(&self, item: u64, row: usize) -> usize {
46        let mut hasher = DefaultHasher::new();
47        item.hash(&mut hasher);
48        (row as u64).hash(&mut hasher);
49        (hasher.finish() as usize) % self.width
50    }
51
52    /// Add an item to the sketch
53    pub fn add(&mut self, item: u64) {
54        self.add_count(item, 1);
55    }
56
57    /// Add an item with a specific count
58    pub fn add_count(&mut self, item: u64, count: u64) {
59        for row in 0..self.depth {
60            let col = self.hash(item, row);
61            self.counters[row][col] = self.counters[row][col].saturating_add(count);
62        }
63        self.total_count = self.total_count.saturating_add(count);
64    }
65
66    /// Query the estimated count of an item
67    pub fn estimate(&self, item: u64) -> u64 {
68        let mut min_count = u64::MAX;
69        for row in 0..self.depth {
70            let col = self.hash(item, row);
71            min_count = min_count.min(self.counters[row][col]);
72        }
73        min_count
74    }
75
76    /// Query the estimated frequency (count / total)
77    pub fn frequency(&self, item: u64) -> f64 {
78        if self.total_count == 0 {
79            return 0.0;
80        }
81        self.estimate(item) as f64 / self.total_count as f64
82    }
83
84    /// Check if an item is likely frequent (above threshold)
85    pub fn is_frequent(&self, item: u64, threshold: f64) -> bool {
86        self.frequency(item) >= threshold
87    }
88
89    /// Get total items added
90    pub fn total_count(&self) -> u64 {
91        self.total_count
92    }
93
94    /// Memory usage in bytes
95    pub fn memory_usage(&self) -> usize {
96        self.width * self.depth * std::mem::size_of::<u64>()
97    }
98
99    /// Reset all counters
100    pub fn clear(&mut self) {
101        for row in &mut self.counters {
102            for counter in row {
103                *counter = 0;
104            }
105        }
106        self.total_count = 0;
107    }
108
109    /// Merge another sketch into this one
110    pub fn merge(&mut self, other: &CountMinSketch) {
111        if self.width != other.width || self.depth != other.depth {
112            return;
113        }
114        for row in 0..self.depth {
115            for col in 0..self.width {
116                self.counters[row][col] =
117                    self.counters[row][col].saturating_add(other.counters[row][col]);
118            }
119        }
120        self.total_count = self.total_count.saturating_add(other.total_count);
121    }
122
123    /// Statistics
124    pub fn stats(&self) -> SketchStats {
125        let non_zero: usize = self.counters
126            .iter()
127            .flat_map(|row| row.iter())
128            .filter(|&&c| c > 0)
129            .count();
130
131        SketchStats {
132            width: self.width,
133            depth: self.depth,
134            total_count: self.total_count,
135            memory_bytes: self.memory_usage(),
136            fill_ratio: non_zero as f64 / (self.width * self.depth) as f64,
137        }
138    }
139}
140
141impl Default for CountMinSketch {
142    fn default() -> Self {
143        Self::new(0.01, 0.001) // 1% error with 99.9% probability
144    }
145}
146
147#[derive(Debug, Clone)]
148pub struct SketchStats {
149    pub width: usize,
150    pub depth: usize,
151    pub total_count: u64,
152    pub memory_bytes: usize,
153    pub fill_ratio: f64,
154}
155
156/// Memory pressure pattern tracker using Count-Min Sketch
157pub struct PressureTracker {
158    /// Sketch for memory load percentages
159    load_sketch: CountMinSketch,
160    /// Sketch for process PIDs causing pressure
161    process_sketch: CountMinSketch,
162    /// Sketch for time-of-day patterns (hour buckets)
163    time_sketch: CountMinSketch,
164    /// High pressure threshold
165    pressure_threshold: u32,
166}
167
168impl PressureTracker {
169    pub fn new() -> Self {
170        Self {
171            load_sketch: CountMinSketch::new(0.01, 0.001),
172            process_sketch: CountMinSketch::new(0.01, 0.001),
173            time_sketch: CountMinSketch::with_dimensions(24, 4), // 24 hours
174            pressure_threshold: 80,
175        }
176    }
177
178    /// Record a memory pressure event
179    pub fn record_pressure(&mut self, load_percent: u32, top_processes: &[u32]) {
180        // Record load level
181        self.load_sketch.add(load_percent as u64);
182
183        // Record processes causing pressure
184        for &pid in top_processes {
185            self.process_sketch.add(pid as u64);
186        }
187
188        // Record time pattern
189        let hour = chrono::Local::now().hour();
190        self.time_sketch.add(hour as u64);
191    }
192
193    /// Check if a load level is frequently seen
194    pub fn is_common_load(&self, load_percent: u32) -> bool {
195        self.load_sketch.is_frequent(load_percent as u64, 0.05)
196    }
197
198    /// Get estimated frequency of a process causing pressure
199    pub fn process_pressure_frequency(&self, pid: u32) -> f64 {
200        self.process_sketch.frequency(pid as u64)
201    }
202
203    /// Get peak pressure hours
204    pub fn get_peak_hours(&self) -> Vec<(u32, u64)> {
205        let mut hours: Vec<(u32, u64)> = (0..24)
206            .map(|h| (h, self.time_sketch.estimate(h as u64)))
207            .collect();
208        hours.sort_by(|a, b| b.1.cmp(&a.1));
209        hours.into_iter().take(5).collect()
210    }
211
212    /// Statistics
213    pub fn stats(&self) -> PressureTrackerStats {
214        PressureTrackerStats {
215            load_stats: self.load_sketch.stats(),
216            process_stats: self.process_sketch.stats(),
217            time_stats: self.time_sketch.stats(),
218        }
219    }
220}
221
222impl Default for PressureTracker {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228#[derive(Debug, Clone)]
229pub struct PressureTrackerStats {
230    pub load_stats: SketchStats,
231    pub process_stats: SketchStats,
232    pub time_stats: SketchStats,
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn test_count_min_sketch() {
241        let mut sketch = CountMinSketch::new(0.01, 0.001);
242
243        // Add items
244        for _ in 0..1000 {
245            sketch.add(42);
246        }
247        for _ in 0..500 {
248            sketch.add(100);
249        }
250        sketch.add(999);
251
252        // Estimates should be >= actual counts
253        assert!(sketch.estimate(42) >= 1000);
254        assert!(sketch.estimate(100) >= 500);
255        assert!(sketch.estimate(999) >= 1);
256
257        // Frequency checks
258        assert!(sketch.is_frequent(42, 0.5));
259        assert!(!sketch.is_frequent(999, 0.1));
260    }
261
262    #[test]
263    fn test_pressure_tracker() {
264        let mut tracker = PressureTracker::new();
265
266        // Record some pressure events
267        tracker.record_pressure(85, &[1234, 5678]);
268        tracker.record_pressure(90, &[1234]);
269        tracker.record_pressure(75, &[9999]);
270
271        // Process 1234 should be more frequent
272        assert!(tracker.process_pressure_frequency(1234) > tracker.process_pressure_frequency(9999));
273    }
274}