genomicframe-core 0.2.0

High-performance genomics I/O and interoperability layer
Documentation
//! Utility functions and helpers

use crate::error::{Error, Result};

/// Validate a DNA sequence (contains only A, C, G, T, N)
pub fn validate_dna_sequence(seq: &str) -> Result<()> {
    for (i, c) in seq.chars().enumerate() {
        if !matches!(c.to_ascii_uppercase(), 'A' | 'C' | 'G' | 'T' | 'N') {
            return Err(Error::InvalidInput(format!(
                "Invalid DNA character '{}' at position {}",
                c, i
            )));
        }
    }
    Ok(())
}

/// Reverse complement of a DNA sequence
pub fn reverse_complement(seq: &str) -> String {
    seq.chars()
        .rev()
        .map(|c| match c.to_ascii_uppercase() {
            'A' => 'T',
            'T' => 'A',
            'C' => 'G',
            'G' => 'C',
            'N' => 'N',
            _ => c,
        })
        .collect()
}

/// Calculate GC content of a DNA sequence
pub fn gc_content(seq: &str) -> f64 {
    let mut gc_count = 0;
    let mut total = 0;

    for c in seq.chars() {
        match c.to_ascii_uppercase() {
            'G' | 'C' => {
                gc_count += 1;
                total += 1;
            }
            'A' | 'T' => total += 1,
            _ => {}
        }
    }

    if total == 0 {
        0.0
    } else {
        (gc_count as f64) / (total as f64)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_validate_dna() {
        assert!(validate_dna_sequence("ACGTACGT").is_ok());
        assert!(validate_dna_sequence("ACGTNNN").is_ok());
        assert!(validate_dna_sequence("acgtacgt").is_ok());
        assert!(validate_dna_sequence("ACGTXYZ").is_err());
    }

    #[test]
    fn test_reverse_complement() {
        assert_eq!(reverse_complement("ACGT"), "ACGT");
        assert_eq!(reverse_complement("AAAA"), "TTTT");
        assert_eq!(reverse_complement("GCGC"), "GCGC");
        assert_eq!(reverse_complement("ATCG"), "CGAT");
    }

    #[test]
    fn test_gc_content() {
        assert_eq!(gc_content("AAAA"), 0.0);
        assert_eq!(gc_content("GGGG"), 1.0);
        assert_eq!(gc_content("ATCG"), 0.5);
        assert!((gc_content("AAATCG") - 0.333333).abs() < 0.001);
        assert!((gc_content("AACCGG") - 0.666666).abs() < 0.001);
    }
}