1use crate::common::create_reader_with_compression;
2use std::fs::File;
3use std::io::{BufRead, Read, Result};
4use std::path::Path;
5
6#[derive(Debug, Clone, PartialEq)]
8pub struct FastqRecord {
9 pub id: String,
10 pub sequence: String,
11 pub quality: String,
12}
13
14pub struct FastqReader {
16 lines: std::io::Lines<std::io::BufReader<Box<dyn Read + Send>>>,
17 sequence_size_hint: usize,
18}
19
20impl FastqReader {
21 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
23 Self::from_file_with_capacity(path, 64 * 1024)
24 }
25
26 pub fn from_file_with_capacity<P: AsRef<Path>>(
32 path: P,
33 sequence_size_hint: usize,
34 ) -> Result<Self> {
35 let file = File::open(path)?;
36 Self::from_reader_with_capacity(file, sequence_size_hint)
37 }
38
39 pub fn from_stdin() -> Result<Self> {
41 Self::from_stdin_with_capacity(64 * 1024)
42 }
43
44 pub fn from_stdin_with_capacity(sequence_size_hint: usize) -> Result<Self> {
46 let stdin = std::io::stdin();
47 Self::from_reader_with_capacity(stdin, sequence_size_hint)
48 }
49
50 pub fn from_reader_with_capacity<R: Read + Send + 'static>(
52 reader: R,
53 sequence_size_hint: usize,
54 ) -> Result<Self> {
55 let buf_reader = create_reader_with_compression(reader)?;
56 let lines = buf_reader.lines();
57
58 Ok(FastqReader {
59 lines,
60 sequence_size_hint: sequence_size_hint.max(64),
61 })
62 }
63
64 fn read_next(&mut self) -> Result<Option<FastqRecord>> {
65 let id = loop {
67 match self.lines.next() {
68 Some(Ok(line)) => {
69 if line.is_empty() || line.chars().all(|c| c.is_whitespace()) {
70 continue;
71 }
72 let trimmed = line.trim();
73 if !trimmed.starts_with('@') {
74 return Err(std::io::Error::new(
75 std::io::ErrorKind::InvalidData,
76 "FASTQ record must start with '@'",
77 ));
78 }
79 break trimmed[1..].to_string();
80 }
81 Some(Err(e)) => return Err(e),
82 None => return Ok(None),
83 }
84 };
85
86 let mut sequence = String::with_capacity(self.sequence_size_hint);
88 let plus_line = loop {
89 match self.lines.next() {
90 Some(Ok(line)) => {
91 let trimmed = line.trim();
92 if trimmed.starts_with('+') {
93 break trimmed.to_string();
94 }
95 if !line.is_empty() && !line.chars().all(|c| c.is_whitespace()) {
96 sequence.push_str(trimmed);
97 }
98 }
99 Some(Err(e)) => return Err(e),
100 None => {
101 return Err(std::io::Error::new(
102 std::io::ErrorKind::UnexpectedEof,
103 "Unexpected end of file while reading FASTQ sequence",
104 ));
105 }
106 }
107 };
108
109 if plus_line.len() > 1 {
111 let plus_id = &plus_line[1..];
112 if plus_id != id {
113 return Err(std::io::Error::new(
114 std::io::ErrorKind::InvalidData,
115 format!(
116 "FASTQ '+' line ID '{}' does not match header ID '{}'",
117 plus_id, id
118 ),
119 ));
120 }
121 }
122
123 let mut quality = String::with_capacity(sequence.len());
125 let sequence_len = sequence.len();
126
127 while quality.len() < sequence_len {
128 match self.lines.next() {
129 Some(Ok(line)) => {
130 let trimmed = line.trim();
131 if !line.is_empty() && !line.chars().all(|c| c.is_whitespace()) {
132 let needed = sequence_len - quality.len();
134 let to_add = if trimmed.len() <= needed {
135 trimmed
136 } else {
137 &trimmed[..needed]
138 };
139 quality.push_str(to_add);
140 }
141 }
142 Some(Err(e)) => return Err(e),
143 None => {
144 return Err(std::io::Error::new(
145 std::io::ErrorKind::UnexpectedEof,
146 "Unexpected end of file while reading FASTQ quality scores",
147 ));
148 }
149 }
150 }
151
152 if sequence.len() != quality.len() {
154 return Err(std::io::Error::new(
155 std::io::ErrorKind::InvalidData,
156 format!(
157 "FASTQ sequence length ({}) does not match quality length ({})",
158 sequence.len(),
159 quality.len()
160 ),
161 ));
162 }
163
164 Ok(Some(FastqRecord {
165 id,
166 sequence,
167 quality,
168 }))
169 }
170}
171
172impl Iterator for FastqReader {
173 type Item = Result<FastqRecord>;
174
175 fn next(&mut self) -> Option<Self::Item> {
176 match self.read_next() {
177 Ok(Some(record)) => Some(Ok(record)),
178 Ok(None) => None,
179 Err(e) => Some(Err(e)),
180 }
181 }
182}
183
184pub fn read_fastq<P: AsRef<Path>>(path: P) -> Result<Vec<FastqRecord>> {
185 read_fastq_with_capacity(path, 64 * 1024)
186}
187
188pub fn read_fastq_with_capacity<P: AsRef<Path>>(
189 path: P,
190 sequence_size_hint: usize,
191) -> Result<Vec<FastqRecord>> {
192 let reader = FastqReader::from_file_with_capacity(path, sequence_size_hint)?;
193 reader.collect()
194}