1use std::io::BufRead;
2
3use crate::error::{Error, Result};
4use crate::mer::MerDna;
5
6pub struct TextReader<R: BufRead> {
28 reader: R,
29 line_buf: String,
30}
31
32impl<R: BufRead> TextReader<R> {
33 pub fn new(reader: R) -> Self {
35 Self {
36 reader,
37 line_buf: String::new(),
38 }
39 }
40
41 fn read_next(&mut self) -> Result<Option<(MerDna, u64)>> {
42 self.line_buf.clear();
43 let bytes_read = self.reader.read_line(&mut self.line_buf)?;
44 if bytes_read == 0 {
45 return Ok(None);
46 }
47
48 let line = self.line_buf.trim();
49 if line.is_empty() {
50 return Ok(None);
51 }
52
53 let (mer_str, count_str) = line
55 .split_once(|c: char| c.is_whitespace())
56 .ok_or_else(|| Error::InvalidHeader(format!("invalid text format line: {line:?}")))?;
57
58 let mer: MerDna = mer_str.parse()?;
59 let count: u64 = count_str
60 .trim()
61 .parse()
62 .map_err(|_| Error::InvalidHeader(format!("invalid count value: {count_str:?}")))?;
63
64 Ok(Some((mer, count)))
65 }
66}
67
68impl<R: BufRead> Iterator for TextReader<R> {
69 type Item = Result<(MerDna, u64)>;
70
71 fn next(&mut self) -> Option<Self::Item> {
72 match self.read_next() {
73 Ok(Some(pair)) => Some(Ok(pair)),
74 Ok(None) => None,
75 Err(e) => Some(Err(e)),
76 }
77 }
78}
79
80#[cfg(test)]
81mod tests {
82 use super::*;
83 use std::io::Cursor;
84
85 fn make_text_reader(content: &str) -> TextReader<std::io::BufReader<Cursor<Vec<u8>>>> {
86 let cursor = Cursor::new(content.as_bytes().to_vec());
87 TextReader::new(std::io::BufReader::new(cursor))
88 }
89
90 #[test]
91 fn test_read_single_line() {
92 let reader = make_text_reader("ACGT 42\n");
93 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
94 assert_eq!(results.len(), 1);
95 assert_eq!(results[0].0.to_string(), "ACGT");
96 assert_eq!(results[0].1, 42);
97 }
98
99 #[test]
100 fn test_read_multiple_lines() {
101 let reader = make_text_reader("AAAA 10\nACGT 42\nTTTT 100\n");
102 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
103 assert_eq!(results.len(), 3);
104 assert_eq!(results[0].0.to_string(), "AAAA");
105 assert_eq!(results[0].1, 10);
106 assert_eq!(results[1].0.to_string(), "ACGT");
107 assert_eq!(results[1].1, 42);
108 assert_eq!(results[2].0.to_string(), "TTTT");
109 assert_eq!(results[2].1, 100);
110 }
111
112 #[test]
113 fn test_read_tab_separated() {
114 let reader = make_text_reader("ACGT\t42\n");
115 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
116 assert_eq!(results.len(), 1);
117 assert_eq!(results[0].0.to_string(), "ACGT");
118 assert_eq!(results[0].1, 42);
119 }
120
121 #[test]
122 fn test_read_empty() {
123 let reader = make_text_reader("");
124 let results: Vec<_> = reader.collect();
125 assert_eq!(results.len(), 0);
126 }
127
128 #[test]
129 fn test_read_no_trailing_newline() {
130 let reader = make_text_reader("ACGT 42");
131 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
132 assert_eq!(results.len(), 1);
133 assert_eq!(results[0].0.to_string(), "ACGT");
134 assert_eq!(results[0].1, 42);
135 }
136
137 #[test]
138 fn test_read_large_count() {
139 let reader = make_text_reader("ACGT 18446744073709551615\n"); let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
141 assert_eq!(results[0].1, u64::MAX);
142 }
143
144 #[test]
145 fn test_read_longer_kmer() {
146 let reader = make_text_reader("ACGTACGTACGTACGTACGTACGTA 99\n");
147 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
148 assert_eq!(results[0].0.to_string(), "ACGTACGTACGTACGTACGTACGTA");
149 assert_eq!(results[0].0.k(), 25);
150 assert_eq!(results[0].1, 99);
151 }
152
153 #[test]
154 fn test_invalid_count() {
155 let reader = make_text_reader("ACGT notanumber\n");
156 let results: Vec<_> = reader.collect();
157 assert_eq!(results.len(), 1);
158 assert!(results[0].is_err());
159 }
160
161 #[test]
162 fn test_invalid_kmer() {
163 let reader = make_text_reader("ACGN 42\n");
164 let results: Vec<_> = reader.collect();
165 assert_eq!(results.len(), 1);
166 assert!(results[0].is_err());
167 }
168
169 #[test]
170 fn test_malformed_line() {
171 let reader = make_text_reader("justoneword\n");
172 let results: Vec<_> = reader.collect();
173 assert_eq!(results.len(), 1);
174 assert!(results[0].is_err());
175 }
176
177 #[test]
178 fn test_read_with_extra_whitespace() {
179 let reader = make_text_reader("ACGT 42\n");
180 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
181 assert_eq!(results[0].1, 42);
182 }
183}