Skip to main content

rustalign_io/
lib.rs

1//! RustAlign I/O - File parsing and formatting
2//!
3//! This crate handles FASTA, FASTQ, and SAM file formats.
4
5#![warn(missing_docs)]
6#![warn(clippy::all)]
7
8pub mod fasta;
9pub mod fastq;
10pub mod mmap;
11pub mod sam;
12
13#[cfg(test)]
14mod proptest;
15
16pub use fasta::{FastaReader, FastaRecord};
17pub use fastq::{FastqReader, FastqRecord};
18pub use mmap::{MmapFile, MmapFileBuilder};
19pub use sam::{SamConfig, SamOpt, SamRecord, SamWriter};
20
21// Re-export Nuc for tests and external use
22pub use rustalign_common::Nuc;
23
24use rustalign_common::AlignResult;
25
26/// Parse a DNA sequence from a string
27pub fn parse_dna(s: &[u8]) -> AlignResult<Vec<Nuc>> {
28    s.iter()
29        .map(|&b| Nuc::from_ascii(b).ok_or(rustalign_common::AlignError::InvalidNucleotide(b)))
30        .collect()
31}
32
33/// Parse a quality string from Phred+33 encoding
34pub fn parse_qual(s: &[u8]) -> AlignResult<Vec<u8>> {
35    Ok(s.iter().map(|&b| b.saturating_sub(33)).collect())
36}
37
38/// Encode quality scores to Phred+33
39pub fn encode_qual(qual: &[u8]) -> Vec<u8> {
40    qual.iter().map(|&q| q.saturating_add(33)).collect()
41}
42
43#[cfg(test)]
44mod tests {
45    use super::*;
46
47    #[test]
48    fn test_parse_dna() {
49        let seq = parse_dna(b"ACGTN").unwrap();
50        assert_eq!(seq.len(), 5);
51        assert_eq!(seq[0], Nuc::A);
52        assert_eq!(seq[4], Nuc::N);
53    }
54
55    #[test]
56    fn test_parse_dna_ambiguous() {
57        // Ambiguous codes are converted to N
58        let seq = parse_dna(b"ACRYGT").unwrap();
59        assert_eq!(seq.len(), 6);
60        assert_eq!(seq[0], Nuc::A);
61        assert_eq!(seq[2], Nuc::N); // R -> N
62        assert_eq!(seq[3], Nuc::N); // Y -> N
63        assert_eq!(seq[4], Nuc::G);
64    }
65
66    #[test]
67    fn test_parse_dna_invalid() {
68        // Non-nucleotide characters should error
69        assert!(parse_dna(b"AC!GT").is_err());
70        assert!(parse_dna(b"AC GT").is_err()); // space
71    }
72
73    #[test]
74    fn test_qual_roundtrip() {
75        let qual = vec![30u8, 35, 40];
76        let encoded = encode_qual(&qual);
77        let decoded = parse_qual(&encoded).unwrap();
78        assert_eq!(qual, decoded);
79    }
80}