ruvector_memopt/algorithms/
sketch.rs1use std::hash::{Hash, Hasher};
7use std::collections::hash_map::DefaultHasher;
8use chrono::Timelike;
9
10pub struct CountMinSketch {
13 counters: Vec<Vec<u64>>,
15 width: usize,
17 depth: usize,
19 total_count: u64,
21}
22
23impl CountMinSketch {
24 pub fn new(epsilon: f64, delta: f64) -> Self {
28 let width = (std::f64::consts::E / epsilon).ceil() as usize;
29 let depth = (1.0 / delta).ln().ceil() as usize;
30
31 Self::with_dimensions(width.max(64), depth.max(4))
32 }
33
34 pub fn with_dimensions(width: usize, depth: usize) -> Self {
36 Self {
37 counters: vec![vec![0u64; width]; depth],
38 width,
39 depth,
40 total_count: 0,
41 }
42 }
43
44 fn hash(&self, item: u64, row: usize) -> usize {
46 let mut hasher = DefaultHasher::new();
47 item.hash(&mut hasher);
48 (row as u64).hash(&mut hasher);
49 (hasher.finish() as usize) % self.width
50 }
51
52 pub fn add(&mut self, item: u64) {
54 self.add_count(item, 1);
55 }
56
57 pub fn add_count(&mut self, item: u64, count: u64) {
59 for row in 0..self.depth {
60 let col = self.hash(item, row);
61 self.counters[row][col] = self.counters[row][col].saturating_add(count);
62 }
63 self.total_count = self.total_count.saturating_add(count);
64 }
65
66 pub fn estimate(&self, item: u64) -> u64 {
68 let mut min_count = u64::MAX;
69 for row in 0..self.depth {
70 let col = self.hash(item, row);
71 min_count = min_count.min(self.counters[row][col]);
72 }
73 min_count
74 }
75
76 pub fn frequency(&self, item: u64) -> f64 {
78 if self.total_count == 0 {
79 return 0.0;
80 }
81 self.estimate(item) as f64 / self.total_count as f64
82 }
83
84 pub fn is_frequent(&self, item: u64, threshold: f64) -> bool {
86 self.frequency(item) >= threshold
87 }
88
89 pub fn total_count(&self) -> u64 {
91 self.total_count
92 }
93
94 pub fn memory_usage(&self) -> usize {
96 self.width * self.depth * std::mem::size_of::<u64>()
97 }
98
99 pub fn clear(&mut self) {
101 for row in &mut self.counters {
102 for counter in row {
103 *counter = 0;
104 }
105 }
106 self.total_count = 0;
107 }
108
109 pub fn merge(&mut self, other: &CountMinSketch) {
111 if self.width != other.width || self.depth != other.depth {
112 return;
113 }
114 for row in 0..self.depth {
115 for col in 0..self.width {
116 self.counters[row][col] =
117 self.counters[row][col].saturating_add(other.counters[row][col]);
118 }
119 }
120 self.total_count = self.total_count.saturating_add(other.total_count);
121 }
122
123 pub fn stats(&self) -> SketchStats {
125 let non_zero: usize = self.counters
126 .iter()
127 .flat_map(|row| row.iter())
128 .filter(|&&c| c > 0)
129 .count();
130
131 SketchStats {
132 width: self.width,
133 depth: self.depth,
134 total_count: self.total_count,
135 memory_bytes: self.memory_usage(),
136 fill_ratio: non_zero as f64 / (self.width * self.depth) as f64,
137 }
138 }
139}
140
141impl Default for CountMinSketch {
142 fn default() -> Self {
143 Self::new(0.01, 0.001) }
145}
146
147#[derive(Debug, Clone)]
148pub struct SketchStats {
149 pub width: usize,
150 pub depth: usize,
151 pub total_count: u64,
152 pub memory_bytes: usize,
153 pub fill_ratio: f64,
154}
155
156pub struct PressureTracker {
158 load_sketch: CountMinSketch,
160 process_sketch: CountMinSketch,
162 time_sketch: CountMinSketch,
164 pressure_threshold: u32,
166}
167
168impl PressureTracker {
169 pub fn new() -> Self {
170 Self {
171 load_sketch: CountMinSketch::new(0.01, 0.001),
172 process_sketch: CountMinSketch::new(0.01, 0.001),
173 time_sketch: CountMinSketch::with_dimensions(24, 4), pressure_threshold: 80,
175 }
176 }
177
178 pub fn record_pressure(&mut self, load_percent: u32, top_processes: &[u32]) {
180 self.load_sketch.add(load_percent as u64);
182
183 for &pid in top_processes {
185 self.process_sketch.add(pid as u64);
186 }
187
188 let hour = chrono::Local::now().hour();
190 self.time_sketch.add(hour as u64);
191 }
192
193 pub fn is_common_load(&self, load_percent: u32) -> bool {
195 self.load_sketch.is_frequent(load_percent as u64, 0.05)
196 }
197
198 pub fn process_pressure_frequency(&self, pid: u32) -> f64 {
200 self.process_sketch.frequency(pid as u64)
201 }
202
203 pub fn get_peak_hours(&self) -> Vec<(u32, u64)> {
205 let mut hours: Vec<(u32, u64)> = (0..24)
206 .map(|h| (h, self.time_sketch.estimate(h as u64)))
207 .collect();
208 hours.sort_by(|a, b| b.1.cmp(&a.1));
209 hours.into_iter().take(5).collect()
210 }
211
212 pub fn stats(&self) -> PressureTrackerStats {
214 PressureTrackerStats {
215 load_stats: self.load_sketch.stats(),
216 process_stats: self.process_sketch.stats(),
217 time_stats: self.time_sketch.stats(),
218 }
219 }
220}
221
222impl Default for PressureTracker {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228#[derive(Debug, Clone)]
229pub struct PressureTrackerStats {
230 pub load_stats: SketchStats,
231 pub process_stats: SketchStats,
232 pub time_stats: SketchStats,
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn test_count_min_sketch() {
241 let mut sketch = CountMinSketch::new(0.01, 0.001);
242
243 for _ in 0..1000 {
245 sketch.add(42);
246 }
247 for _ in 0..500 {
248 sketch.add(100);
249 }
250 sketch.add(999);
251
252 assert!(sketch.estimate(42) >= 1000);
254 assert!(sketch.estimate(100) >= 500);
255 assert!(sketch.estimate(999) >= 1);
256
257 assert!(sketch.is_frequent(42, 0.5));
259 assert!(!sketch.is_frequent(999, 0.1));
260 }
261
262 #[test]
263 fn test_pressure_tracker() {
264 let mut tracker = PressureTracker::new();
265
266 tracker.record_pressure(85, &[1234, 5678]);
268 tracker.record_pressure(90, &[1234]);
269 tracker.record_pressure(75, &[9999]);
270
271 assert!(tracker.process_pressure_frequency(1234) > tracker.process_pressure_frequency(9999));
273 }
274}