1use std::io::Read;
2
3use crate::error::{Error, Result};
4use crate::header::FileHeader;
5use crate::mer::MerDna;
6
7pub struct BinaryReader<R: Read> {
29 reader: R,
30 k: usize,
31 key_buf: Vec<u8>,
32 val_buf: Vec<u8>,
33}
34
35impl<R: Read> BinaryReader<R> {
36 pub fn new(reader: R, header: &FileHeader) -> Result<Self> {
41 let key_len_bytes = header
42 .key_bytes()
43 .ok_or_else(|| Error::MissingField("key_len".to_string()))?;
44 let val_len_bytes = header
45 .data_val_len()
46 .ok_or_else(|| Error::MissingField("counter_len or val_len".to_string()))?;
47 let k = header
48 .k()
49 .ok_or_else(|| Error::MissingField("key_len".to_string()))?;
50
51 Ok(Self {
52 reader,
53 k,
54 key_buf: vec![0u8; key_len_bytes],
55 val_buf: vec![0u8; val_len_bytes],
56 })
57 }
58
59 pub fn with_params(reader: R, k: usize, key_len_bytes: usize, val_len_bytes: usize) -> Self {
61 Self {
62 reader,
63 k,
64 key_buf: vec![0u8; key_len_bytes],
65 val_buf: vec![0u8; val_len_bytes],
66 }
67 }
68
69 pub fn k(&self) -> usize {
71 self.k
72 }
73
74 fn read_next(&mut self) -> Result<Option<(MerDna, u64)>> {
76 match self.reader.read_exact(&mut self.key_buf) {
78 Ok(()) => {}
79 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
80 Err(e) => return Err(Error::Io(e)),
81 }
82
83 self.reader.read_exact(&mut self.val_buf).map_err(|e| {
85 if e.kind() == std::io::ErrorKind::UnexpectedEof {
86 Error::UnexpectedEof
87 } else {
88 Error::Io(e)
89 }
90 })?;
91
92 let mer = MerDna::from_bytes(&self.key_buf, self.k);
93
94 let mut count = 0u64;
96 for (i, &byte) in self.val_buf.iter().enumerate() {
97 count |= (byte as u64) << (i * 8);
98 }
99
100 Ok(Some((mer, count)))
101 }
102}
103
104impl<R: Read> Iterator for BinaryReader<R> {
105 type Item = Result<(MerDna, u64)>;
106
107 fn next(&mut self) -> Option<Self::Item> {
108 match self.read_next() {
109 Ok(Some(pair)) => Some(Ok(pair)),
110 Ok(None) => None,
111 Err(e) => Some(Err(e)),
112 }
113 }
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use std::io::Cursor;
120
121 fn encode_record(mer: &MerDna, count: u64, val_len: usize) -> Vec<u8> {
123 let key_bytes = (mer.k() * 2 + 7) / 8;
124 let mut buf = Vec::new();
125
126 let words = mer.words();
128 let mut bytes_written = 0;
129 for &word in words {
130 for byte_idx in 0..8 {
131 if bytes_written >= key_bytes {
132 break;
133 }
134 buf.push((word >> (byte_idx * 8)) as u8);
135 bytes_written += 1;
136 }
137 }
138
139 for i in 0..val_len {
141 buf.push((count >> (i * 8)) as u8);
142 }
143
144 buf
145 }
146
147 #[test]
148 fn test_read_single_record() {
149 let mer: MerDna = "ACGT".parse().unwrap();
150 let count = 42u64;
151 let val_len = 4;
152 let data = encode_record(&mer, count, val_len);
153
154 let key_bytes = 1; let reader = BinaryReader::with_params(Cursor::new(data), 4, key_bytes, val_len);
156
157 let results: Vec<_> = reader.collect();
158 assert_eq!(results.len(), 1);
159 let (read_mer, read_count) = results[0].as_ref().unwrap();
160 assert_eq!(read_mer.to_string(), "ACGT");
161 assert_eq!(*read_count, 42);
162 }
163
164 #[test]
165 fn test_read_multiple_records() {
166 let mers = vec![("AAAA", 10u64), ("ACGT", 42u64), ("TTTT", 100u64)];
167 let val_len = 4;
168 let key_bytes = 1;
169 let mut data = Vec::new();
170 for (seq, count) in &mers {
171 let mer: MerDna = seq.parse().unwrap();
172 data.extend(encode_record(&mer, *count, val_len));
173 }
174
175 let reader = BinaryReader::with_params(Cursor::new(data), 4, key_bytes, val_len);
176 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
177
178 assert_eq!(results.len(), 3);
179 assert_eq!(results[0].0.to_string(), "AAAA");
180 assert_eq!(results[0].1, 10);
181 assert_eq!(results[1].0.to_string(), "ACGT");
182 assert_eq!(results[1].1, 42);
183 assert_eq!(results[2].0.to_string(), "TTTT");
184 assert_eq!(results[2].1, 100);
185 }
186
187 #[test]
188 fn test_read_empty() {
189 let reader = BinaryReader::with_params(Cursor::new(Vec::new()), 4, 1, 4);
190 let results: Vec<_> = reader.collect();
191 assert_eq!(results.len(), 0);
192 }
193
194 #[test]
195 fn test_read_truncated_value() {
196 let mer: MerDna = "ACGT".parse().unwrap();
198 let mut data = Vec::new();
199 let words = mer.words();
200 data.push(words[0] as u8);
201 let reader = BinaryReader::with_params(Cursor::new(data), 4, 1, 4);
204 let results: Vec<_> = reader.collect();
205 assert_eq!(results.len(), 1);
206 assert!(results[0].is_err());
207 }
208
209 #[test]
210 fn test_read_large_count() {
211 let mer: MerDna = "ACGT".parse().unwrap();
212 let count = u64::MAX;
213 let val_len = 8;
214 let data = encode_record(&mer, count, val_len);
215
216 let reader = BinaryReader::with_params(Cursor::new(data), 4, 1, val_len);
217 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
218 assert_eq!(results[0].1, u64::MAX);
219 }
220
221 #[test]
222 fn test_read_longer_kmer() {
223 let seq = "ACGTACGTACGTACGTACGTACGTA"; let mer: MerDna = seq.parse().unwrap();
226 let count = 99u64;
227 let val_len = 4;
228 let key_bytes = 7; let data = encode_record(&mer, count, val_len);
230
231 let reader = BinaryReader::with_params(Cursor::new(data), 25, key_bytes, val_len);
232 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
233 assert_eq!(results.len(), 1);
234 assert_eq!(results[0].0.to_string(), seq);
235 assert_eq!(results[0].1, 99);
236 }
237
238 #[test]
239 fn test_read_small_val_len() {
240 let mer: MerDna = "ACGT".parse().unwrap();
241 let count = 255u64; let val_len = 1;
243 let data = encode_record(&mer, count, val_len);
244
245 let reader = BinaryReader::with_params(Cursor::new(data), 4, 1, val_len);
246 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
247 assert_eq!(results[0].1, 255);
248 }
249
250 #[test]
251 fn test_read_two_byte_count() {
252 let mer: MerDna = "ACGT".parse().unwrap();
253 let count = 1000u64;
254 let val_len = 2;
255 let data = encode_record(&mer, count, val_len);
256
257 let reader = BinaryReader::with_params(Cursor::new(data), 4, 1, val_len);
258 let results: Vec<_> = reader.map(|r| r.unwrap()).collect();
259 assert_eq!(results[0].1, 1000);
260 }
261}