1use crate::parse_dna;
4use rustalign_common::{AlignError, AlignResult, Nuc};
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::path::Path;
8
9#[derive(Debug, Clone)]
11pub struct FastaRecord {
12 pub id: String,
14
15 pub desc: Option<String>,
17
18 pub seq: Vec<Nuc>,
20}
21
22impl FastaRecord {
23 pub fn new(id: String, seq: Vec<Nuc>) -> Self {
25 Self {
26 id,
27 desc: None,
28 seq,
29 }
30 }
31
32 pub fn len(&self) -> usize {
34 self.seq.len()
35 }
36
37 pub fn is_empty(&self) -> bool {
39 self.seq.is_empty()
40 }
41}
42
43pub struct FastaReader<R: BufRead> {
45 reader: R,
46 current_id: String,
47 current_desc: Option<String>,
48 current_seq: Vec<u8>,
49}
50
51impl FastaReader<BufReader<File>> {
52 pub fn from_path<P: AsRef<Path>>(path: P) -> AlignResult<Self> {
54 let file = File::open(path)?;
55 Ok(Self::new(BufReader::new(file)))
56 }
57}
58
59impl<R: BufRead> FastaReader<R> {
60 pub fn new(reader: R) -> Self {
62 Self {
63 reader,
64 current_id: String::new(),
65 current_desc: None,
66 current_seq: Vec::new(),
67 }
68 }
69
70 #[allow(clippy::should_implement_trait)]
72 pub fn next(&mut self) -> AlignResult<Option<FastaRecord>> {
73 let mut line = String::new();
74
75 loop {
76 line.clear();
77 let bytes_read = self.reader.read_line(&mut line)?;
78 let trimmed = line.trim();
79
80 if bytes_read == 0 {
81 return if self.current_id.is_empty() {
83 Ok(None)
84 } else {
85 Ok(Some(self.finish_record()?))
86 };
87 }
88
89 if trimmed.is_empty() {
90 continue;
91 }
92
93 if let Some(header) = trimmed.strip_prefix('>') {
94 if self.current_id.is_empty() {
96 if let Some((id, desc)) = header.split_once(char::is_whitespace) {
98 self.current_id = id.to_string();
99 self.current_desc = Some(desc.to_string());
100 } else {
101 self.current_id = header.to_string();
102 self.current_desc = None;
103 }
104 } else {
105 return Ok(Some(self.finish_record_with_header(trimmed)?));
109 }
110 } else {
111 if self.current_id.is_empty() {
113 return Err(AlignError::InvalidFormat(
114 "Sequence data before header".into(),
115 ));
116 }
117 self.current_seq.extend_from_slice(trimmed.as_bytes());
118 }
119 }
120 }
121
122 fn finish_record(&mut self) -> AlignResult<FastaRecord> {
123 let seq = parse_dna(&self.current_seq)?;
124 let id = std::mem::take(&mut self.current_id);
125 let desc = self.current_desc.take();
126 self.current_seq.clear();
127
128 Ok(FastaRecord { id, desc, seq })
129 }
130
131 fn finish_record_with_header(&mut self, next_header: &str) -> AlignResult<FastaRecord> {
132 let record = self.finish_record()?;
133
134 let header = &next_header[1..];
136 if let Some((id, desc)) = header.split_once(char::is_whitespace) {
137 self.current_id = id.to_string();
138 self.current_desc = Some(desc.to_string());
139 } else {
140 self.current_id = header.to_string();
141 self.current_desc = None;
142 }
143
144 Ok(record)
145 }
146}
147
148impl<R: BufRead> Iterator for FastaReader<R> {
150 type Item = AlignResult<FastaRecord>;
151
152 fn next(&mut self) -> Option<Self::Item> {
153 self.next().transpose()
154 }
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160
161 #[test]
162 fn test_record_new() {
163 let record = FastaRecord::new("seq1".to_string(), vec![Nuc::A, Nuc::C, Nuc::G, Nuc::T]);
164 assert_eq!(record.id, "seq1");
165 assert_eq!(record.len(), 4);
166 }
167}