Skip to main content

rustalign_io/
fastq.rs

1//! FASTQ file parsing
2
3use crate::{parse_dna, parse_qual};
4use rustalign_common::{AlignError, AlignResult, Nuc};
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::path::Path;
8
9/// A FASTQ record
10#[derive(Debug, Clone)]
11pub struct FastqRecord {
12    /// Read ID (header line without @)
13    pub id: String,
14
15    /// Sequence data
16    pub seq: Vec<Nuc>,
17
18    /// Quality scores (Phred+33 decoded)
19    pub qual: Vec<u8>,
20}
21
22impl FastqRecord {
23    /// Create a new FASTQ record
24    pub fn new(id: String, seq: Vec<Nuc>, qual: Vec<u8>) -> Self {
25        Self { id, seq, qual }
26    }
27
28    /// Get the read length
29    pub fn len(&self) -> usize {
30        self.seq.len()
31    }
32
33    /// Check if read is empty
34    pub fn is_empty(&self) -> bool {
35        self.seq.is_empty()
36    }
37
38    /// Validate that sequence and quality lengths match
39    pub fn validate(&self) -> AlignResult<()> {
40        if self.seq.len() != self.qual.len() {
41            return Err(AlignError::InvalidFormat(format!(
42                "Sequence length {} != quality length {}",
43                self.seq.len(),
44                self.qual.len()
45            )));
46        }
47        Ok(())
48    }
49}
50
51/// FASTQ file reader
52pub struct FastqReader<R: BufRead> {
53    reader: R,
54}
55
56impl FastqReader<BufReader<File>> {
57    /// Open a FASTQ file
58    pub fn from_path<P: AsRef<Path>>(path: P) -> AlignResult<Self> {
59        let file = File::open(path)?;
60        Ok(Self::new(BufReader::new(file)))
61    }
62}
63
64impl<R: BufRead> FastqReader<R> {
65    /// Create a new FASTQ reader
66    pub fn new(reader: R) -> Self {
67        Self { reader }
68    }
69
70    /// Read the next record
71    #[allow(clippy::should_implement_trait)]
72    pub fn next(&mut self) -> AlignResult<Option<FastqRecord>> {
73        let mut header = String::new();
74        let mut seq_line = String::new();
75        let mut plus_line = String::new();
76        let mut qual_line = String::new();
77
78        // Read header line
79        header.clear();
80        if self.reader.read_line(&mut header)? == 0 {
81            return Ok(None);
82        }
83
84        let header = header.trim();
85        if !header.starts_with('@') {
86            return Err(AlignError::InvalidFormat(
87                "FASTQ header must start with @".into(),
88            ));
89        }
90        let id = header[1..].to_string();
91
92        // Read sequence line
93        seq_line.clear();
94        if self.reader.read_line(&mut seq_line)? == 0 {
95            return Err(AlignError::InvalidFormat("Incomplete FASTQ record".into()));
96        }
97        let seq = parse_dna(seq_line.trim().as_bytes())?;
98
99        // Read plus line
100        plus_line.clear();
101        if self.reader.read_line(&mut plus_line)? == 0 {
102            return Err(AlignError::InvalidFormat("Incomplete FASTQ record".into()));
103        }
104        if !plus_line.trim().starts_with('+') {
105            return Err(AlignError::InvalidFormat(
106                "FASTQ plus line must start with +".into(),
107            ));
108        }
109
110        // Read quality line
111        qual_line.clear();
112        if self.reader.read_line(&mut qual_line)? == 0 {
113            return Err(AlignError::InvalidFormat("Incomplete FASTQ record".into()));
114        }
115        let qual = parse_qual(qual_line.trim().as_bytes())?;
116
117        Ok(Some(FastqRecord { id, seq, qual }))
118    }
119}
120
121/// Iterator over FASTQ records
122impl<R: BufRead> Iterator for FastqReader<R> {
123    type Item = AlignResult<FastqRecord>;
124
125    fn next(&mut self) -> Option<Self::Item> {
126        self.next().transpose()
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn test_record_new() {
136        let record = FastqRecord::new(
137            "read1".to_string(),
138            vec![Nuc::A, Nuc::C, Nuc::G, Nuc::T],
139            vec![30, 30, 30, 30],
140        );
141        assert_eq!(record.id, "read1");
142        assert_eq!(record.len(), 4);
143        assert!(record.validate().is_ok());
144    }
145
146    #[test]
147    fn test_record_validate_mismatch() {
148        let record = FastqRecord::new(
149            "read1".to_string(),
150            vec![Nuc::A, Nuc::C],
151            vec![30, 30, 30, 30],
152        );
153        assert!(record.validate().is_err());
154    }
155}