sdivi_patterns/
entropy.rs1use std::collections::BTreeMap;
4
5use crate::catalog::PatternStats;
6use crate::fingerprint::PatternFingerprint;
7
8pub fn compute_entropy(stats: &BTreeMap<PatternFingerprint, PatternStats>) -> f64 {
41 let distinct = stats.len();
42 if distinct <= 1 {
43 return 0.0;
44 }
45
46 let total: u32 = stats.values().map(|s| s.count).sum();
47 if total == 0 {
48 return 0.0;
49 }
50
51 let total_f = f64::from(total);
52 let raw: f64 = stats
53 .values()
54 .map(|s| {
55 let p = f64::from(s.count) / total_f;
56 if p > 0.0 {
57 -p * p.log2()
58 } else {
59 0.0
60 }
61 })
62 .sum();
63
64 let max = (distinct as f64).log2();
65 if max <= 0.0 {
66 return 0.0;
67 }
68
69 (raw / max).clamp(0.0, 1.0)
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75 use crate::fingerprint::fingerprint_node_kind;
76
77 fn make_stats(count: u32) -> PatternStats {
78 PatternStats {
79 count,
80 locations: vec![],
81 }
82 }
83
84 #[test]
85 fn empty_stats_returns_zero() {
86 let stats: BTreeMap<PatternFingerprint, PatternStats> = BTreeMap::new();
87 assert_eq!(compute_entropy(&stats), 0.0);
88 }
89
90 #[test]
91 fn single_shape_returns_zero() {
92 let mut stats = BTreeMap::new();
93 stats.insert(fingerprint_node_kind("try_expression"), make_stats(10));
94 assert_eq!(compute_entropy(&stats), 0.0);
95 }
96
97 #[test]
98 fn equal_distribution_two_shapes_returns_one() {
99 let mut stats = BTreeMap::new();
100 stats.insert(fingerprint_node_kind("try_expression"), make_stats(5));
101 stats.insert(fingerprint_node_kind("match_expression"), make_stats(5));
102 let h = compute_entropy(&stats);
103 assert!((h - 1.0).abs() < 1e-10);
104 }
105
106 #[test]
107 fn unequal_distribution_is_between_zero_and_one() {
108 let mut stats = BTreeMap::new();
109 stats.insert(fingerprint_node_kind("try_expression"), make_stats(9));
110 stats.insert(fingerprint_node_kind("match_expression"), make_stats(1));
111 let h = compute_entropy(&stats);
112 assert!(h > 0.0 && h < 1.0);
113 }
114}