1
2use anyhow::{Context, Result};
3use flate2::read::MultiGzDecoder;
4use std::fs::File;
5use std::io::{BufRead, BufReader, Read};
6use std::path::Path;
7
8#[derive(Debug, Clone)]
9pub struct FastaRecord {
10
11 pub name: String,
12
13 pub seq: String,
14}
15
16pub struct FastaReader {
17 reader: BufReader<File>,
18 line_buf: String,
19 current_name: Option<String>,
20}
21
22impl FastaReader {
23
24 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
25 let file = File::open(path.as_ref())
26 .with_context(|| format!("Failed to open FASTA: {}", path.as_ref().display()))?;
27 let mut reader = Self {
28 reader: BufReader::with_capacity(1024 * 1024, file),
29 line_buf: String::with_capacity(256),
30 current_name: None,
31 };
32
33 reader.line_buf.clear();
34 if reader.reader.read_line(&mut reader.line_buf)? > 0
35 && reader.line_buf.starts_with('>') {
36 reader.current_name = Some(
37 reader.line_buf[1..]
38 .split_whitespace()
39 .next()
40 .unwrap_or("")
41 .to_string(),
42 );
43 }
44
45 Ok(reader)
46 }
47
48 pub fn read_next(&mut self) -> Result<Option<FastaRecord>> {
49 let name = match self.current_name.take() {
50 Some(n) => n,
51 None => return Ok(None),
52 };
53
54 let mut seq = String::with_capacity(10000);
55
56 loop {
57 self.line_buf.clear();
58 if self.reader.read_line(&mut self.line_buf)? == 0 {
59
60 break;
61 }
62
63 if self.line_buf.starts_with('>') {
64
65 self.current_name = Some(
66 self.line_buf[1..]
67 .split_whitespace()
68 .next()
69 .unwrap_or("")
70 .to_string(),
71 );
72 break;
73 } else {
74
75 seq.push_str(self.line_buf.trim_end());
76 }
77 }
78
79 Ok(Some(FastaRecord { name, seq }))
80 }
81}
82
83impl Iterator for FastaReader {
84 type Item = Result<FastaRecord>;
85
86 fn next(&mut self) -> Option<Self::Item> {
87 match self.read_next() {
88 Ok(Some(record)) => Some(Ok(record)),
89 Ok(None) => None,
90 Err(e) => Some(Err(e)),
91 }
92 }
93}
94
95#[derive(Debug, Clone)]
96pub struct FastqRecord {
97
98 pub name: String,
99
100 pub seq: String,
101
102 pub qual: String,
103}
104
105pub struct FastqReader<R: Read> {
106 reader: BufReader<R>,
107 line_buf: String,
108}
109
110impl FastqReader<File> {
111
112 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
113 let file = File::open(path.as_ref())
114 .with_context(|| format!("Failed to open FASTQ: {}", path.as_ref().display()))?;
115 Ok(Self {
116 reader: BufReader::with_capacity(1024 * 1024, file),
117 line_buf: String::with_capacity(512),
118 })
119 }
120}
121
122impl FastqReader<MultiGzDecoder<File>> {
123
124 pub fn open_gz<P: AsRef<Path>>(path: P) -> Result<Self> {
125 let file = File::open(path.as_ref())
126 .with_context(|| format!("Failed to open FASTQ.gz: {}", path.as_ref().display()))?;
127 let decoder = MultiGzDecoder::new(file);
128 Ok(Self {
129 reader: BufReader::with_capacity(1024 * 1024, decoder),
130 line_buf: String::with_capacity(512),
131 })
132 }
133}
134
135impl<R: Read> FastqReader<R> {
136
137 pub fn read_next(&mut self) -> Result<Option<FastqRecord>> {
138
139 self.line_buf.clear();
140 if self.reader.read_line(&mut self.line_buf)? == 0 {
141 return Ok(None);
142 }
143 let name = self.line_buf.trim_start_matches('@').trim_end().to_string();
144 if name.is_empty() {
145 return Ok(None);
146 }
147
148 self.line_buf.clear();
149 self.reader.read_line(&mut self.line_buf)?;
150 let seq = self.line_buf.trim_end().to_string();
151
152 self.line_buf.clear();
153 self.reader.read_line(&mut self.line_buf)?;
154
155 self.line_buf.clear();
156 self.reader.read_line(&mut self.line_buf)?;
157 let qual = self.line_buf.trim_end().to_string();
158
159 Ok(Some(FastqRecord { name, seq, qual }))
160 }
161}
162
163pub enum FastqFile {
164
165 Plain(FastqReader<File>),
166
167 Gzipped(FastqReader<MultiGzDecoder<File>>),
168}
169
170impl FastqFile {
171
172 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
173 let path = path.as_ref();
174 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
175
176 if ext == "gz" {
177 Ok(FastqFile::Gzipped(FastqReader::open_gz(path)?))
178 } else {
179 Ok(FastqFile::Plain(FastqReader::open(path)?))
180 }
181 }
182
183 pub fn read_next(&mut self) -> Result<Option<FastqRecord>> {
184 match self {
185 FastqFile::Plain(r) => r.read_next(),
186 FastqFile::Gzipped(r) => r.read_next(),
187 }
188 }
189}
190
191#[cfg(test)]
192mod tests {
193 use super::*;
194
195 #[test]
196 fn test_fastq_record() {
197 let record = FastqRecord {
198 name: "read1".to_string(),
199 seq: "ATGC".to_string(),
200 qual: "IIII".to_string(),
201 };
202 assert_eq!(record.seq.len(), record.qual.len());
203 }
204}