1use std::borrow::Borrow;
33use std::collections::VecDeque;
34use std::iter;
35
36#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
44pub struct Finder {
45 start_codons: Vec<VecDeque<u8>>,
46 stop_codons: Vec<VecDeque<u8>>,
47 min_len: usize,
48}
49
50impl Finder {
51 pub fn new<'a>(
54 start_codons: Vec<&'a [u8; 3]>,
55 stop_codons: Vec<&'a [u8; 3]>,
56 min_len: usize,
57 ) -> Self {
58 Finder {
59 start_codons: start_codons
60 .iter()
61 .map(|x| x.iter().copied().collect::<VecDeque<u8>>())
62 .collect(),
63 stop_codons: stop_codons
64 .iter()
65 .map(|x| x.iter().copied().collect::<VecDeque<u8>>())
66 .collect(),
67 min_len,
68 }
69 }
70
71 pub fn find_all<C, T>(&self, seq: T) -> Matches<'_, C, T::IntoIter>
73 where
74 C: Borrow<u8>,
75 T: IntoIterator<Item = C>,
76 {
77 Matches {
78 finder: self,
79 state: State::new(),
80 seq: seq.into_iter().enumerate(),
81 }
82 }
83}
84
85#[derive(
89 Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize,
90)]
91pub struct Orf {
92 pub start: usize,
93 pub end: usize,
94 pub offset: i8,
95}
96
97#[derive(Clone, Debug)]
99struct State {
100 start_pos: [Vec<usize>; 3],
101 codon: VecDeque<u8>,
102 found: VecDeque<Orf>,
103}
104
105impl State {
106 pub fn new() -> Self {
108 State {
109 start_pos: [Vec::new(), Vec::new(), Vec::new()],
110 codon: VecDeque::new(),
111 found: VecDeque::new(),
112 }
113 }
114}
115
116#[derive(Clone, Debug)]
118pub struct Matches<'a, C, T>
119where
120 C: Borrow<u8>,
121 T: Iterator<Item = C>,
122{
123 finder: &'a Finder,
124 state: State,
125 seq: iter::Enumerate<T>,
126}
127
128impl<'a, C, T> Iterator for Matches<'a, C, T>
129where
130 C: Borrow<u8>,
131 T: Iterator<Item = C>,
132{
133 type Item = Orf;
134
135 fn next(&mut self) -> Option<Orf> {
136 let mut offset: usize;
137
138 if !self.state.found.is_empty() {
140 return self.state.found.pop_front();
141 }
142
143 for (index, nuc) in self.seq.by_ref() {
144 if self.state.codon.len() >= 3 {
146 self.state.codon.pop_front();
147 }
148 self.state.codon.push_back(*nuc.borrow());
149 offset = (index + 1) % 3;
150
151 if self.finder.start_codons.contains(&self.state.codon) {
153 self.state.start_pos[offset].push(index);
154 }
155 if !self.state.start_pos[offset].is_empty() {
157 if self.finder.stop_codons.contains(&self.state.codon) {
159 for start_pos in &self.state.start_pos[offset] {
160 if index + 1 - start_pos > self.finder.min_len {
162 self.state.found.push_back(Orf {
164 start: start_pos - 2,
165 end: index + 1,
166 offset: offset as i8,
167 });
168 } else {
170 break;
171 }
172 }
173 self.state.start_pos[offset] = Vec::new();
175 }
176 }
177 if !self.state.found.is_empty() {
178 return self.state.found.pop_front();
179 }
180 }
181 None
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 fn basic_finder() -> Finder {
190 let start_codons = vec![b"ATG"];
191 let stop_codons = vec![b"TGA", b"TAG", b"TAA"];
192 let min_len = 5;
193 Finder::new(start_codons, stop_codons, min_len)
194 }
195
196 #[test]
197 fn test_no_orf() {
198 let finder = basic_finder();
199 let sequence = b"ACGGCTAGAAAAGGCTAGAAAA";
200 assert!(finder.find_all(sequence).next().is_none());
201 }
202
203 #[test]
204 fn test_one_orf_no_offset() {
205 let finder = basic_finder();
206 let sequence = b"GGGATGGGGTGAGGG";
207 let expected = vec![Orf {
208 start: 3,
209 end: 12,
210 offset: 0,
211 }];
212 assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>());
213 }
214
215 #[test]
216 fn test_one_orf_with_offset() {
217 let finder = basic_finder();
218 let sequence = b"AGGGATGGGGTGAGGG";
219 let expected = vec![Orf {
220 start: 4,
221 end: 13,
222 offset: 1,
223 }];
224 assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>());
225 }
226
227 #[test]
228 fn test_two_orfs_different_offsets() {
229 let finder = basic_finder();
230 let sequence = b"ATGGGGTGAGGGGGATGGAAAAATAAG";
231 let expected = vec![
232 Orf {
233 start: 0,
234 end: 9,
235 offset: 0,
236 },
237 Orf {
238 start: 14,
239 end: 26,
240 offset: 2,
241 },
242 ];
243 assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>());
244 }
245
246 #[test]
247 fn test_three_nested_and_offset_orfs() {
248 let finder = basic_finder();
249 let sequence = b"ATGGGGATGGGGGGATGGAAAAATAAGTAG";
250 let expected = vec![
251 Orf {
252 start: 14,
253 end: 26,
254 offset: 2,
255 },
256 Orf {
257 start: 0,
258 end: 30,
259 offset: 0,
260 },
261 Orf {
262 start: 6,
263 end: 30,
264 offset: 0,
265 },
266 ];
267 assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>());
268 }
269}