Skip to main content

ternary_sketch/
lib.rs

1//! # ternary-sketch
2//!
3//! Ternary sketch for approximate GPU workload analysis.
4//! Count-Min sketch with ternary counters.
5
6fn hash(item: &[u8], seed: u64) -> usize {
7    let mut h = seed;
8    for &b in item { h = h.wrapping_mul(31).wrapping_add(b as u64); }
9    h as usize
10}
11
12#[derive(Debug, Clone)]
13pub struct TernarySketch {
14    width: usize,
15    depth: usize,
16    table: Vec<Vec<i8>>, // depth x width, each cell is {-1, 0, +1}
17    total_updates: u64,
18}
19
20impl TernarySketch {
21    pub fn new(width: usize, depth: usize) -> Self {
22        Self { width, depth, table: vec![vec![0; width]; depth], total_updates: 0 }
23    }
24
25    /// Insert item: increment if possible within ternary range.
26    pub fn insert(&mut self, item: &[u8]) {
27        self.total_updates += 1;
28        for d in 0..self.depth {
29            let w = hash(item, d as u64 * 997) % self.width;
30            self.table[d][w] = match self.table[d][w] { -1 => 0, 0 => 1, _ => 1 };
31        }
32    }
33
34    /// Remove item: decrement.
35    pub fn remove(&mut self, item: &[u8]) {
36        for d in 0..self.depth {
37            let w = hash(item, d as u64 * 997) % self.width;
38            self.table[d][w] = match self.table[d][w] { 1 => 0, 0 => -1, _ => -1 };
39        }
40    }
41
42    /// Estimate frequency: minimum across all depth rows.
43    pub fn estimate(&self, item: &[u8]) -> i8 {
44        (0..self.depth).map(|d| {
45            let w = hash(item, d as u64 * 997) % self.width;
46            self.table[d][w]
47        }).min().unwrap_or(0)
48    }
49
50    /// Heavy hitters: items with estimate > threshold.
51    pub fn heavy_hitters(&self, candidates: &[Vec<u8>], threshold: i8) -> Vec<usize> {
52        candidates.iter().enumerate().filter(|(_, c)| self.estimate(c) > threshold).map(|(i, _)| i).collect()
53    }
54
55    /// Merge two sketches: take max per cell.
56    pub fn merge(&mut self, other: &TernarySketch) {
57        for d in 0..self.depth.min(other.depth) {
58            for w in 0..self.width.min(other.width) {
59                self.table[d][w] = self.table[d][w].max(other.table[d][w]);
60            }
61        }
62    }
63
64    pub fn fill_rate(&self) -> f64 {
65        let total = self.depth * self.width;
66        let filled = self.table.iter().flat_map(|r| r.iter()).filter(|&&v| v != 0).count();
67        filled as f64 / total as f64
68    }
69
70    pub fn total_updates(&self) -> u64 { self.total_updates }
71}
72
73#[cfg(test)]
74mod tests {
75    use super::*;
76
77    #[test]
78    fn test_insert_estimate() {
79        let mut sk = TernarySketch::new(64, 4);
80        sk.insert(b"kernel_a");
81        assert!(sk.estimate(b"kernel_a") > 0);
82    }
83
84    #[test]
85    fn test_missing() {
86        let sk = TernarySketch::new(64, 4);
87        assert_eq!(sk.estimate(b"absent"), 0);
88    }
89
90    #[test]
91    fn test_heavy_hitters() {
92        let mut sk = TernarySketch::new(64, 4);
93        sk.insert(b"hot_kernel");
94        sk.insert(b"hot_kernel");
95        let hh = sk.heavy_hitters(&[b"hot_kernel".to_vec(), b"cold".to_vec()], 0);
96        assert!(hh.contains(&0));
97    }
98
99    #[test]
100    fn test_merge() {
101        let mut sk1 = TernarySketch::new(32, 3);
102        let mut sk2 = TernarySketch::new(32, 3);
103        sk1.insert(b"x");
104        sk2.insert(b"y");
105        sk1.merge(&sk2);
106        assert!(sk1.estimate(b"x") > 0);
107    }
108
109    #[test]
110    fn test_fill_rate() {
111        let mut sk = TernarySketch::new(16, 2);
112        assert_eq!(sk.fill_rate(), 0.0);
113        sk.insert(b"a");
114        assert!(sk.fill_rate() > 0.0);
115    }
116
117    #[test]
118    fn test_remove() {
119        let mut sk = TernarySketch::new(64, 4);
120        sk.insert(b"temp");
121        sk.remove(b"temp");
122        assert!(sk.estimate(b"temp") <= 0);
123    }
124
125    #[test]
126    fn test_ternary_clamp() {
127        let mut sk = TernarySketch::new(16, 2);
128        for _ in 0..10 { sk.insert(b"repeat"); }
129        let est = sk.estimate(b"repeat");
130        assert!(est <= 1 && est >= -1); // always ternary
131    }
132}