nodedb_types/approx/
spacesaving.rs1use std::collections::HashMap;
4
5#[derive(Debug)]
11pub struct SpaceSaving {
12 items: HashMap<u64, (u64, u64)>,
13 max_items: usize,
14}
15
16impl SpaceSaving {
17 pub fn new(k: usize) -> Self {
18 Self {
19 items: HashMap::with_capacity(k + 1),
20 max_items: k.max(1),
21 }
22 }
23
24 pub fn add(&mut self, item: u64) {
25 if let Some(entry) = self.items.get_mut(&item) {
26 entry.0 += 1;
27 return;
28 }
29
30 if self.items.len() < self.max_items {
31 self.items.insert(item, (1, 0));
32 } else {
33 let Some((&min_key, &(min_count, _))) =
34 self.items.iter().min_by_key(|(_, (count, _))| *count)
35 else {
36 return;
37 };
38 self.items.remove(&min_key);
39 self.items.insert(item, (min_count + 1, min_count));
40 }
41 }
42
43 pub fn add_batch(&mut self, items: &[u64]) {
44 for &item in items {
45 self.add(item);
46 }
47 }
48
49 pub fn top_k(&self) -> Vec<(u64, u64, u64)> {
53 let mut result: Vec<(u64, u64, u64)> = self
54 .items
55 .iter()
56 .map(|(&item, &(count, error))| (item, count, error))
57 .collect();
58 result.sort_by(|a, b| b.1.cmp(&a.1));
59 result
60 }
61
62 pub fn merge(&mut self, other: &SpaceSaving) {
63 for (&item, &(count, error)) in &other.items {
64 let entry = self.items.entry(item).or_insert((0, 0));
65 entry.0 += count;
66 entry.1 += error;
67 }
68
69 while self.items.len() > self.max_items {
70 let Some((&min_key, _)) = self.items.iter().min_by_key(|(_, (count, _))| *count) else {
71 break;
72 };
73 self.items.remove(&min_key);
74 }
75 }
76}
77
78#[cfg(test)]
79mod tests {
80 use super::*;
81
82 #[test]
83 fn topk_basic() {
84 let mut ss = SpaceSaving::new(3);
85 for _ in 0..100 {
86 ss.add(1);
87 }
88 for _ in 0..50 {
89 ss.add(2);
90 }
91 for _ in 0..30 {
92 ss.add(3);
93 }
94 for _ in 0..10 {
95 ss.add(4);
96 }
97 let top = ss.top_k();
98 assert_eq!(top[0].0, 1);
99 assert_eq!(top[0].1, 100);
100 }
101
102 #[test]
103 fn topk_merge() {
104 let mut a = SpaceSaving::new(5);
105 let mut b = SpaceSaving::new(5);
106 for _ in 0..100 {
107 a.add(1);
108 }
109 for _ in 0..80 {
110 b.add(1);
111 }
112 for _ in 0..50 {
113 b.add(2);
114 }
115 a.merge(&b);
116 let top = a.top_k();
117 assert_eq!(top[0].0, 1);
118 assert_eq!(top[0].1, 180);
119 }
120
121 #[test]
122 fn topk_eviction() {
123 let mut ss = SpaceSaving::new(3);
124 for i in 0..10u64 {
125 for _ in 0..(10 - i) {
126 ss.add(i);
127 }
128 }
129 let top = ss.top_k();
130 assert_eq!(top.len(), 3);
131 assert!(top[0].1 >= top[1].1);
132 assert!(top[1].1 >= top[2].1);
133 }
134}