1pub fn shannon_entropy(kmer: u64, kmer_length: u8) -> f64 {
2 let mut counts = [0u8; 4];
3
4 for i in 0..kmer_length {
5 let nucleotide = (kmer >> (2 * i)) & 0b11;
6 counts[nucleotide as usize] += 1;
7 }
8
9 let length = kmer_length as f64;
10 let mut entropy = 0.0;
11
12 for count in counts.iter() {
13 if *count > 0 {
14 let p = (*count as f64) / length;
15 entropy -= p * p.log2();
16 }
17 }
18
19 entropy
20}
21
22pub fn passes_entropy_filter(kmer: u64, kmer_length: u8, min_entropy: f64) -> bool {
23 shannon_entropy(kmer, kmer_length) >= min_entropy
24}
25
26#[cfg(test)]
27mod tests {
28 use super::*;
29
30 #[test]
31 fn test_shannon_entropy() {
32 let homopolymer = 0; assert!(shannon_entropy(homopolymer, 8) < 1.0);
34
35 let mixed = 0b10011100; assert!(shannon_entropy(mixed, 4) > 1.5);
37 }
38
39 #[test]
40 fn test_passes_entropy_filter() {
41 let homopolymer = 0;
42 assert!(!passes_entropy_filter(homopolymer, 8, 1.5));
43
44 let mixed = 0b10011100;
45 assert!(passes_entropy_filter(mixed, 4, 1.5));
46 }
47
48 #[test]
49 fn test_entropy_edge_cases() {
50 assert_eq!(shannon_entropy(0, 1), 0.0);
51
52 let all_diff = 0b11100100; let max_entropy = shannon_entropy(all_diff, 4);
54 assert!((max_entropy - 2.0).abs() < 0.01);
55 }
56}