genomicframe_core/formats/fasta/
reader.rs1use crate::core::{GenomicReader, GenomicRecordIterator};
6use crate::error::Result;
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::path::Path;
10
11#[derive(Debug, Clone)]
13pub struct FastaRecord {
14 pub id: String,
16 pub description: Option<String>,
18 pub sequence: String,
20}
21
22impl FastaRecord {
23 pub fn header(&self) -> String {
25 match &self.description {
26 Some(desc) => format!("{} {}", self.id, desc),
27 None => self.id.clone(),
28 }
29 }
30
31 pub fn len(&self) -> usize {
33 self.sequence.len()
34 }
35
36 pub fn is_empty(&self) -> bool {
38 self.sequence.is_empty()
39 }
40}
41
42pub struct FastaReader<R: BufRead> {
44 reader: R,
45 current_line: Option<String>,
46}
47
48impl FastaReader<BufReader<File>> {
49 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
51 let file = File::open(path)?;
52 let reader = BufReader::new(file);
53 Ok(Self::new(reader))
54 }
55}
56
57impl<R: BufRead> FastaReader<R> {
58 pub fn new(reader: R) -> Self {
60 Self {
61 reader,
62 current_line: None,
63 }
64 }
65
66 fn peek_line(&mut self) -> Result<Option<&str>> {
68 if self.current_line.is_none() {
69 let mut line = String::new();
70 let bytes_read = self.reader.read_line(&mut line)?;
71 if bytes_read == 0 {
72 return Ok(None);
73 }
74 self.current_line = Some(line);
75 }
76 Ok(self.current_line.as_deref())
77 }
78
79 fn consume_line(&mut self) -> Option<String> {
81 self.current_line.take()
82 }
83}
84
85impl<R: BufRead> GenomicRecordIterator for FastaReader<R> {
86 type Record = FastaRecord;
87
88 fn next_raw(&mut self) -> Result<Option<Vec<u8>>> {
89 Ok(None)
91 }
92
93 fn next_record(&mut self) -> Result<Option<Self::Record>> {
94 loop {
96 let line = match self.peek_line()? {
97 Some(line) => line.to_string(),
98 None => return Ok(None),
99 };
100
101 if line.starts_with('>') {
102 self.consume_line();
103
104 let header = line.trim_start_matches('>').trim();
106 let (id, description) = match header.split_once(' ') {
107 Some((id, desc)) => (id.to_string(), Some(desc.to_string())),
108 None => (header.to_string(), None),
109 };
110
111 let mut sequence = String::new();
113 loop {
114 match self.peek_line()? {
115 Some(line) if !line.starts_with('>') => {
116 let line = self.consume_line().unwrap();
117 sequence.push_str(line.trim());
118 }
119 _ => break,
120 }
121 }
122
123 return Ok(Some(FastaRecord {
124 id,
125 description,
126 sequence,
127 }));
128 } else {
129 self.consume_line();
130 }
131 }
132 }
133}
134
135impl<R: BufRead> GenomicReader for FastaReader<R> {
136 type Metadata = ();
137
138 fn metadata(&self) -> &Self::Metadata {
139 &() }
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146 use std::io::Cursor;
147
148 #[test]
149 fn test_fasta_parsing() {
150 let fasta_data = ">seq1 description here\n\
151 ACGTACGTACGT\n\
152 ACGTACGT\n\
153 >seq2\n\
154 GGGGCCCC\n";
155
156 let cursor = Cursor::new(fasta_data);
157 let mut reader = FastaReader::new(cursor);
158
159 let rec1 = reader.next_record().unwrap().unwrap();
160 assert_eq!(rec1.id, "seq1");
161 assert_eq!(rec1.description, Some("description here".to_string()));
162 assert_eq!(rec1.sequence, "ACGTACGTACGTACGTACGT");
163
164 let rec2 = reader.next_record().unwrap().unwrap();
165 assert_eq!(rec2.id, "seq2");
166 assert_eq!(rec2.description, None);
167 assert_eq!(rec2.sequence, "GGGGCCCC");
168
169 assert!(reader.next_record().unwrap().is_none());
170 }
171}