Skip to main content

genomicframe_core/
utils.rs

1//! Utility functions and helpers
2
3use crate::error::{Error, Result};
4
5/// Validate a DNA sequence (contains only A, C, G, T, N)
6pub fn validate_dna_sequence(seq: &str) -> Result<()> {
7    for (i, c) in seq.chars().enumerate() {
8        if !matches!(c.to_ascii_uppercase(), 'A' | 'C' | 'G' | 'T' | 'N') {
9            return Err(Error::InvalidInput(format!(
10                "Invalid DNA character '{}' at position {}",
11                c, i
12            )));
13        }
14    }
15    Ok(())
16}
17
18/// Reverse complement of a DNA sequence
19pub fn reverse_complement(seq: &str) -> String {
20    seq.chars()
21        .rev()
22        .map(|c| match c.to_ascii_uppercase() {
23            'A' => 'T',
24            'T' => 'A',
25            'C' => 'G',
26            'G' => 'C',
27            'N' => 'N',
28            _ => c,
29        })
30        .collect()
31}
32
33/// Calculate GC content of a DNA sequence
34pub fn gc_content(seq: &str) -> f64 {
35    let mut gc_count = 0;
36    let mut total = 0;
37
38    for c in seq.chars() {
39        match c.to_ascii_uppercase() {
40            'G' | 'C' => {
41                gc_count += 1;
42                total += 1;
43            }
44            'A' | 'T' => total += 1,
45            _ => {}
46        }
47    }
48
49    if total == 0 {
50        0.0
51    } else {
52        (gc_count as f64) / (total as f64)
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59
60    #[test]
61    fn test_validate_dna() {
62        assert!(validate_dna_sequence("ACGTACGT").is_ok());
63        assert!(validate_dna_sequence("ACGTNNN").is_ok());
64        assert!(validate_dna_sequence("acgtacgt").is_ok());
65        assert!(validate_dna_sequence("ACGTXYZ").is_err());
66    }
67
68    #[test]
69    fn test_reverse_complement() {
70        assert_eq!(reverse_complement("ACGT"), "ACGT");
71        assert_eq!(reverse_complement("AAAA"), "TTTT");
72        assert_eq!(reverse_complement("GCGC"), "GCGC");
73        assert_eq!(reverse_complement("ATCG"), "CGAT");
74    }
75
76    #[test]
77    fn test_gc_content() {
78        assert_eq!(gc_content("AAAA"), 0.0);
79        assert_eq!(gc_content("GGGG"), 1.0);
80        assert_eq!(gc_content("ATCG"), 0.5);
81        assert!((gc_content("AAATCG") - 0.333333).abs() < 0.001);
82        assert!((gc_content("AACCGG") - 0.666666).abs() < 0.001);
83    }
84}