1pub mod algorithm;
2mod builder;
3pub mod iterator;
4
5pub use builder::MinimizerBuilder;
6pub use minimizer_queue::DefaultHashBuilder;
7
8#[cfg(test)]
9mod tests {
10 use super::*;
11 use biotest::Format;
12 use nohash_hasher::BuildNoHashHasher;
13
14 #[test]
15 fn test_minimizer_iter() {
16 let seq = b"TGATTGCACAATC";
17 let minimizer_size = 3;
18 let width = 4;
19 let hasher = BuildNoHashHasher::<u64>::default();
20 let mut min_iter = MinimizerBuilder::new()
21 .minimizer_size(minimizer_size)
22 .width(width)
23 .hasher(hasher)
24 .iter(seq);
25
26 assert_eq!(min_iter.next(), Some((0b001111, 2))); assert_eq!(min_iter.next(), Some((0b010001, 6))); assert_eq!(min_iter.next(), Some((0b000100, 7))); assert_eq!(min_iter.next(), Some((0b000011, 9))); assert_eq!(min_iter.next(), None);
31 }
32
33 #[test]
34 fn test_minimizer_iter_pos() {
35 let seq = b"TGATTGCACAATC";
36 let minimizer_size = 3;
37 let width = 4;
38 let hasher = BuildNoHashHasher::<u64>::default();
39 let mut min_iter = MinimizerBuilder::<u64>::new()
40 .minimizer_size(minimizer_size)
41 .width(width)
42 .hasher(hasher)
43 .iter_pos(seq);
44
45 assert_eq!(min_iter.next(), Some(2)); assert_eq!(min_iter.next(), Some(6)); assert_eq!(min_iter.next(), Some(7)); assert_eq!(min_iter.next(), Some(9)); assert_eq!(min_iter.next(), None);
50 }
51
52 #[test]
53 fn test_mod_minimizer_iter() {
54 let seq = b"TGATTGCACAATC";
55 let minimizer_size = 4;
56 let width = 4;
57 let hasher = BuildNoHashHasher::<u64>::default();
58 let mut min_iter = MinimizerBuilder::new_mod()
59 .minimizer_size(minimizer_size)
60 .width(width)
61 .hasher(hasher)
62 .iter(seq);
63
64 assert_eq!(min_iter.next(), Some((0b00111110, 2))); assert_eq!(min_iter.next(), Some((0b01000100, 6))); assert_eq!(min_iter.next(), Some((0b00010000, 7))); assert_eq!(min_iter.next(), Some((0b00001101, 9))); assert_eq!(min_iter.next(), None);
69 }
70
71 #[test]
72 fn test_mod_minimizer_iter_pos() {
73 let seq = b"TGATTGCACAATC";
74 let minimizer_size = 4;
75 let width = 4;
76 let hasher = BuildNoHashHasher::<u64>::default();
77 let mut min_iter = MinimizerBuilder::<u64, _>::new_mod()
78 .minimizer_size(minimizer_size)
79 .width(width)
80 .hasher(hasher)
81 .iter_pos(seq);
82
83 assert_eq!(min_iter.next(), Some(2)); assert_eq!(min_iter.next(), Some(6)); assert_eq!(min_iter.next(), Some(7)); assert_eq!(min_iter.next(), Some(9)); assert_eq!(min_iter.next(), None);
88 }
89
90 fn gen_seq(len: usize) -> Vec<u8> {
91 let mut rng = biotest::rand();
92 let mut seq = Vec::with_capacity(len);
93 let generator = biotest::Sequence::builder()
94 .sequence_len(len)
95 .build()
96 .unwrap();
97 generator.record(&mut seq, &mut rng).unwrap();
98 seq
99 }
100
101 fn rc(seq: &[u8]) -> Vec<u8> {
102 seq.iter()
103 .rev()
104 .map(|&b| match b {
105 b'A' => b'T',
106 b'a' => b't',
107 b'T' => b'A',
108 b't' => b'a',
109 b'C' => b'G',
110 b'c' => b'g',
111 b'G' => b'C',
112 b'g' => b'c',
113 b => b,
114 })
115 .collect()
116 }
117
118 #[test]
119 fn test_canonical_minimizer_iter() {
120 let seq_len = 1_000_000;
121 let seq = &gen_seq(seq_len);
122 let seq_rc = &rc(seq);
123 let minimizer_size = 21;
124 let width = 11;
125
126 let mins: Vec<u64> = MinimizerBuilder::new()
127 .canonical()
128 .minimizer_size(minimizer_size)
129 .width(width)
130 .iter(seq)
131 .map(|(min, _, _)| min)
132 .collect();
133 let mut mins_rc: Vec<u64> = MinimizerBuilder::new()
134 .canonical()
135 .minimizer_size(minimizer_size)
136 .width(width)
137 .iter(seq_rc)
138 .map(|(min, _, _)| min)
139 .collect();
140 mins_rc.reverse();
141
142 assert_eq!(mins, mins_rc);
143 }
144
145 #[test]
146 fn test_canonical_minimizer_iter_pos() {
147 let seq_len = 1_000_000;
148 let seq = &gen_seq(seq_len);
149 let seq_rc = &rc(seq);
150 let minimizer_size = 21;
151 let width = 11;
152
153 let mins: Vec<_> = MinimizerBuilder::<u64>::new()
154 .canonical()
155 .minimizer_size(minimizer_size)
156 .width(width)
157 .iter_pos(seq)
158 .map(|(pos, _)| pos)
159 .collect();
160 let mut mins_rc: Vec<_> = MinimizerBuilder::<u64>::new()
161 .canonical()
162 .minimizer_size(minimizer_size)
163 .width(width)
164 .iter_pos(seq_rc)
165 .map(|(pos, _)| seq_len - pos - minimizer_size)
166 .collect();
167 mins_rc.reverse();
168
169 assert_eq!(mins, mins_rc);
170 }
171
172 #[test]
173 fn test_canonical_mod_minimizer_iter() {
174 let seq_len = 1_000_000;
175 let seq = &gen_seq(seq_len);
176 let seq_rc = &rc(seq);
177 let minimizer_size = 21;
178 let width = 11;
179
180 let mins: Vec<_> = MinimizerBuilder::<u64, _>::new_mod()
181 .canonical()
182 .minimizer_size(minimizer_size)
183 .width(width)
184 .iter(seq)
185 .map(|(min, _, _)| min)
186 .collect();
187 let mut mins_rc: Vec<_> = MinimizerBuilder::<u64, _>::new_mod()
188 .canonical()
189 .minimizer_size(minimizer_size)
190 .width(width)
191 .iter(seq_rc)
192 .map(|(min, _, _)| min)
193 .collect();
194 mins_rc.reverse();
195
196 assert_eq!(mins, mins_rc);
197 }
198
199 #[test]
200 fn test_canonical_mod_minimizer_iter_pos() {
201 let seq_len = 1_000_000;
202 let seq = &gen_seq(seq_len);
203 let seq_rc = &rc(seq);
204 let minimizer_size = 21;
205 let width = 11;
206 let mins: Vec<_> = MinimizerBuilder::<u64, _>::new_mod()
207 .canonical()
208 .minimizer_size(minimizer_size)
209 .width(width)
210 .iter_pos(seq)
211 .map(|(pos, _)| pos)
212 .collect();
213 let mut mins_rc: Vec<_> = MinimizerBuilder::<u64, _>::new_mod()
214 .canonical()
215 .minimizer_size(minimizer_size)
216 .width(width)
217 .iter_pos(seq_rc)
218 .map(|(pos, _)| seq_len - pos - minimizer_size)
219 .collect();
220 mins_rc.reverse();
221
222 assert_eq!(mins, mins_rc);
223 }
224
225 #[test]
226 fn test_repetitive_minimizer_iter_pos() {
227 const SEQ_LEN: usize = 100;
228 let seq = &[b'A'; SEQ_LEN];
229 let seq_rc = &rc(seq);
230 let minimizer_size = 21;
231 let width = 11;
232
233 let mins: Vec<_> = MinimizerBuilder::<u64>::new()
234 .canonical()
235 .minimizer_size(minimizer_size)
236 .width(width)
237 .iter_pos(seq)
238 .map(|(pos, _)| pos)
239 .collect();
240 let mut mins_rc: Vec<_> = MinimizerBuilder::<u64>::new()
241 .canonical()
242 .minimizer_size(minimizer_size)
243 .width(width)
244 .iter_pos(seq_rc)
245 .map(|(pos, _)| SEQ_LEN - pos - minimizer_size)
246 .collect();
247 mins_rc.reverse();
248
249 assert_eq!(mins, mins_rc);
250 }
251
252 #[test]
253 fn test_repetitive_2_minimizer_iter_pos() {
254 const SEQ_LEN: usize = 100;
255 let seq = &mut [b'A'; SEQ_LEN];
256 for i in (1..SEQ_LEN).step_by(2) {
257 seq[i] = b'G';
258 }
259 let seq_rc = &rc(seq);
260 let minimizer_size = 21;
261 let width = 11;
262
263 let mins: Vec<_> = MinimizerBuilder::<u64>::new()
264 .canonical()
265 .minimizer_size(minimizer_size)
266 .width(width)
267 .iter_pos(seq)
268 .map(|(pos, _)| pos)
269 .collect();
270 let mut mins_rc: Vec<_> = MinimizerBuilder::<u64>::new()
271 .canonical()
272 .minimizer_size(minimizer_size)
273 .width(width)
274 .iter_pos(seq_rc)
275 .map(|(pos, _)| SEQ_LEN - pos - minimizer_size)
276 .collect();
277 mins_rc.reverse();
278
279 assert_eq!(mins, mins_rc);
280 }
281}