nodedb_types/approx/
count_min.rs1#[derive(Debug)]
14pub struct CountMinSketch {
15 table: Vec<Vec<u64>>,
16 width: usize,
17 depth: usize,
18 total: u64,
19 seeds: Vec<u64>,
20}
21
22impl CountMinSketch {
23 pub fn new() -> Self {
27 Self::with_params(1024, 4)
28 }
29
30 pub fn with_params(width: usize, depth: usize) -> Self {
35 let width = width.max(16);
36 let depth = depth.max(2);
37 let seeds: Vec<u64> = (0..depth as u64)
38 .map(|i| 0x517cc1b727220a95u64.wrapping_add(i.wrapping_mul(0x6c62272e07bb0142)))
39 .collect();
40 Self {
41 table: vec![vec![0u64; width]; depth],
42 width,
43 depth,
44 total: 0,
45 seeds,
46 }
47 }
48
49 pub fn add(&mut self, item: u64) {
51 self.add_count(item, 1);
52 }
53
54 pub fn add_count(&mut self, item: u64, count: u64) {
56 self.total += count;
57 for d in 0..self.depth {
58 let idx = self.hash(d, item);
59 self.table[d][idx] += count;
60 }
61 }
62
63 pub fn add_batch(&mut self, items: &[u64]) {
65 for &item in items {
66 self.add(item);
67 }
68 }
69
70 pub fn estimate(&self, item: u64) -> u64 {
75 let mut min_count = u64::MAX;
76 for d in 0..self.depth {
77 let idx = self.hash(d, item);
78 min_count = min_count.min(self.table[d][idx]);
79 }
80 min_count
81 }
82
83 pub fn total(&self) -> u64 {
85 self.total
86 }
87
88 pub fn merge(&mut self, other: &CountMinSketch) {
92 debug_assert_eq!(self.width, other.width);
93 debug_assert_eq!(self.depth, other.depth);
94 self.total += other.total;
95 for d in 0..self.depth {
96 for w in 0..self.width {
97 self.table[d][w] += other.table[d][w];
98 }
99 }
100 }
101
102 pub fn memory_bytes(&self) -> usize {
104 self.width * self.depth * std::mem::size_of::<u64>()
105 }
106
107 pub fn table_bytes(&self) -> Vec<u8> {
109 let mut bytes = Vec::with_capacity(self.width * self.depth * 8);
110 for row in &self.table {
111 for &val in row {
112 bytes.extend_from_slice(&val.to_le_bytes());
113 }
114 }
115 bytes
116 }
117
118 pub fn from_table_bytes(data: &[u8], width: usize, depth: usize) -> Self {
120 let mut sketch = Self::with_params(width, depth);
121 let mut offset = 0;
122 for d in 0..depth {
123 for w in 0..width {
124 if offset + 8 <= data.len() {
125 sketch.table[d][w] =
126 u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap_or([0; 8]));
127 sketch.total += sketch.table[d][w];
128 offset += 8;
129 }
130 }
131 }
132 sketch.total = sketch.table[0].iter().sum();
135 sketch
136 }
137
138 #[inline]
139 fn hash(&self, depth_idx: usize, item: u64) -> usize {
140 let h = splitmix64(item ^ self.seeds[depth_idx]);
141 (h as usize) % self.width
142 }
143}
144
145impl Default for CountMinSketch {
146 fn default() -> Self {
147 Self::new()
148 }
149}
150
151fn splitmix64(mut x: u64) -> u64 {
153 x = x.wrapping_add(0x9e3779b97f4a7c15);
154 x = (x ^ (x >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
155 x = (x ^ (x >> 27)).wrapping_mul(0x94d049bb133111eb);
156 x ^ (x >> 31)
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162
163 #[test]
164 fn empty_sketch() {
165 let cms = CountMinSketch::new();
166 assert_eq!(cms.estimate(42), 0);
167 assert_eq!(cms.total(), 0);
168 }
169
170 #[test]
171 fn exact_for_single_item() {
172 let mut cms = CountMinSketch::new();
173 for _ in 0..1000 {
174 cms.add(42);
175 }
176 assert_eq!(cms.estimate(42), 1000);
177 }
178
179 #[test]
180 fn overestimate_bounded() {
181 let mut cms = CountMinSketch::new();
182 for i in 0..100_000u64 {
184 cms.add(i % 1000);
185 }
186 for i in 0..1000u64 {
189 let est = cms.estimate(i);
190 assert!(est >= 100, "item {i}: expected ≥100, got {est}");
191 }
192 for i in 0..1000u64 {
194 let est = cms.estimate(i);
195 assert!(est <= 400, "item {i}: expected ≤400, got {est}");
196 }
197 }
198
199 #[test]
200 fn absent_item_bounded() {
201 let mut cms = CountMinSketch::new();
202 for i in 0..10_000u64 {
203 cms.add(i);
204 }
205 let est = cms.estimate(99999);
207 assert!(est <= 50, "absent item: expected ≤50, got {est}");
209 }
210
211 #[test]
212 fn merge() {
213 let mut a = CountMinSketch::new();
214 let mut b = CountMinSketch::new();
215 for _ in 0..500 {
216 a.add(1);
217 }
218 for _ in 0..300 {
219 b.add(1);
220 }
221 for _ in 0..200 {
222 b.add(2);
223 }
224 a.merge(&b);
225 assert_eq!(a.estimate(1), 800);
226 assert_eq!(a.total(), 1000);
227 }
228
229 #[test]
230 fn batch_add() {
231 let mut cms = CountMinSketch::new();
232 cms.add_batch(&[1, 1, 2, 3, 3, 3]);
233 assert_eq!(cms.estimate(1), 2);
234 assert_eq!(cms.estimate(3), 3);
235 assert_eq!(cms.total(), 6);
236 }
237
238 #[test]
239 fn memory() {
240 let cms = CountMinSketch::new();
241 assert_eq!(cms.memory_bytes(), 1024 * 4 * 8); }
243}