Skip to main content

jam_rs/
core_utils.rs

1pub fn shannon_entropy(kmer: u64, kmer_length: u8) -> f64 {
2    let mut counts = [0u8; 4];
3
4    for i in 0..kmer_length {
5        let nucleotide = (kmer >> (2 * i)) & 0b11;
6        counts[nucleotide as usize] += 1;
7    }
8
9    let length = kmer_length as f64;
10    let mut entropy = 0.0;
11
12    for count in counts.iter() {
13        if *count > 0 {
14            let p = (*count as f64) / length;
15            entropy -= p * p.log2();
16        }
17    }
18
19    entropy
20}
21
22pub fn passes_entropy_filter(kmer: u64, kmer_length: u8, min_entropy: f64) -> bool {
23    shannon_entropy(kmer, kmer_length) >= min_entropy
24}
25
26#[cfg(test)]
27mod tests {
28    use super::*;
29
30    #[test]
31    fn test_shannon_entropy() {
32        let homopolymer = 0; // All A's
33        assert!(shannon_entropy(homopolymer, 8) < 1.0);
34
35        let mixed = 0b10011100; // ATGC pattern
36        assert!(shannon_entropy(mixed, 4) > 1.5);
37    }
38
39    #[test]
40    fn test_passes_entropy_filter() {
41        let homopolymer = 0;
42        assert!(!passes_entropy_filter(homopolymer, 8, 1.5));
43
44        let mixed = 0b10011100;
45        assert!(passes_entropy_filter(mixed, 4, 1.5));
46    }
47
48    #[test]
49    fn test_entropy_edge_cases() {
50        assert_eq!(shannon_entropy(0, 1), 0.0);
51
52        let all_diff = 0b11100100; // TCGA
53        let max_entropy = shannon_entropy(all_diff, 4);
54        assert!((max_entropy - 2.0).abs() < 0.01);
55    }
56}