1use crate::{parse_dna, parse_qual};
4use rustalign_common::{AlignError, AlignResult, Nuc};
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::path::Path;
8
9#[derive(Debug, Clone)]
11pub struct FastqRecord {
12 pub id: String,
14
15 pub seq: Vec<Nuc>,
17
18 pub qual: Vec<u8>,
20}
21
22impl FastqRecord {
23 pub fn new(id: String, seq: Vec<Nuc>, qual: Vec<u8>) -> Self {
25 Self { id, seq, qual }
26 }
27
28 pub fn len(&self) -> usize {
30 self.seq.len()
31 }
32
33 pub fn is_empty(&self) -> bool {
35 self.seq.is_empty()
36 }
37
38 pub fn validate(&self) -> AlignResult<()> {
40 if self.seq.len() != self.qual.len() {
41 return Err(AlignError::InvalidFormat(format!(
42 "Sequence length {} != quality length {}",
43 self.seq.len(),
44 self.qual.len()
45 )));
46 }
47 Ok(())
48 }
49}
50
51pub struct FastqReader<R: BufRead> {
53 reader: R,
54}
55
56impl FastqReader<BufReader<File>> {
57 pub fn from_path<P: AsRef<Path>>(path: P) -> AlignResult<Self> {
59 let file = File::open(path)?;
60 Ok(Self::new(BufReader::new(file)))
61 }
62}
63
64impl<R: BufRead> FastqReader<R> {
65 pub fn new(reader: R) -> Self {
67 Self { reader }
68 }
69
70 #[allow(clippy::should_implement_trait)]
72 pub fn next(&mut self) -> AlignResult<Option<FastqRecord>> {
73 let mut header = String::new();
74 let mut seq_line = String::new();
75 let mut plus_line = String::new();
76 let mut qual_line = String::new();
77
78 header.clear();
80 if self.reader.read_line(&mut header)? == 0 {
81 return Ok(None);
82 }
83
84 let header = header.trim();
85 if !header.starts_with('@') {
86 return Err(AlignError::InvalidFormat(
87 "FASTQ header must start with @".into(),
88 ));
89 }
90 let id = header[1..].to_string();
91
92 seq_line.clear();
94 if self.reader.read_line(&mut seq_line)? == 0 {
95 return Err(AlignError::InvalidFormat("Incomplete FASTQ record".into()));
96 }
97 let seq = parse_dna(seq_line.trim().as_bytes())?;
98
99 plus_line.clear();
101 if self.reader.read_line(&mut plus_line)? == 0 {
102 return Err(AlignError::InvalidFormat("Incomplete FASTQ record".into()));
103 }
104 if !plus_line.trim().starts_with('+') {
105 return Err(AlignError::InvalidFormat(
106 "FASTQ plus line must start with +".into(),
107 ));
108 }
109
110 qual_line.clear();
112 if self.reader.read_line(&mut qual_line)? == 0 {
113 return Err(AlignError::InvalidFormat("Incomplete FASTQ record".into()));
114 }
115 let qual = parse_qual(qual_line.trim().as_bytes())?;
116
117 Ok(Some(FastqRecord { id, seq, qual }))
118 }
119}
120
121impl<R: BufRead> Iterator for FastqReader<R> {
123 type Item = AlignResult<FastqRecord>;
124
125 fn next(&mut self) -> Option<Self::Item> {
126 self.next().transpose()
127 }
128}
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133
134 #[test]
135 fn test_record_new() {
136 let record = FastqRecord::new(
137 "read1".to_string(),
138 vec![Nuc::A, Nuc::C, Nuc::G, Nuc::T],
139 vec![30, 30, 30, 30],
140 );
141 assert_eq!(record.id, "read1");
142 assert_eq!(record.len(), 4);
143 assert!(record.validate().is_ok());
144 }
145
146 #[test]
147 fn test_record_validate_mismatch() {
148 let record = FastqRecord::new(
149 "read1".to_string(),
150 vec![Nuc::A, Nuc::C],
151 vec![30, 30, 30, 30],
152 );
153 assert!(record.validate().is_err());
154 }
155}