1#![deny(warnings, missing_docs)]
2use nom::{
23 branch::alt,
24 bytes::complete::{tag, take_until},
25 character::complete::{alpha1, char, digit1, line_ending, multispace1, not_line_ending},
26 error::Error,
27 multi::{many0, many1},
28 sequence::{pair, tuple},
29 Err, IResult,
30};
31
32#[derive(Debug, PartialEq)]
33pub struct RepeatSpacer<'a> {
35 pub repeat: &'a str,
37 pub spacer: &'a str,
39 pub start: usize,
41 pub end: usize,
43 pub spacer_start: usize,
45 pub spacer_end: usize,
47 pub repeat_start: usize,
49 pub repeat_end: usize,
51}
52
53#[derive(Debug, PartialEq)]
54pub struct RepeatOnly<'a> {
56 pub repeat: &'a str,
58 pub start: usize,
60 pub end: usize,
62}
63
64#[derive(Debug, PartialEq)]
66pub enum Repeat<'a> {
67 WithSpacer(RepeatSpacer<'a>),
69 WithoutSpacer(RepeatOnly<'a>),
71}
72
73#[derive(Debug, PartialEq)]
74pub struct Array<'a> {
76 pub order: usize,
78 pub start: usize,
80 pub end: usize,
82 pub repeat_spacers: Vec<Repeat<'a>>,
84}
85
86#[derive(Debug)]
87pub struct Contig<'a> {
89 pub accession: &'a str,
91 pub bp: usize,
93 pub arrays: Vec<Array<'a>>,
95}
96
97pub fn parse(input: &str) -> Result<Vec<Contig>, Err<Error<&str>>> {
99 let result = many0(parse_contig_arrays)(input);
100 match result {
101 Ok((_, contigs)) => Ok(contigs),
102 Err(e) => Err(e),
103 }
104}
105
106fn parse_contig_arrays(input: &str) -> IResult<&str, Contig> {
108 let result = tuple((
109 parse_accession_line,
110 skip_empty_line,
111 many1(parse_array),
112 parse_footer,
113 ))(input);
114 match result {
115 Ok((remainder, ((accession, bp), _, arrays, _))) => Ok((
116 remainder,
117 Contig {
118 accession,
119 bp,
120 arrays,
121 },
122 )),
123 Err(e) => Err(e),
124 }
125}
126
127fn parse_array(input: &str) -> IResult<&str, Array> {
129 let result = tuple((
130 skip_empty_line,
131 parse_crispr_order_and_coordinates,
132 skip_empty_line,
133 skip_one_line,
134 skip_one_line,
135 many1(parse_repeat_spacer_line),
136 skip_one_line,
137 skip_one_line,
138 ))(input);
139 match result {
140 Ok((remainder, (_, (order, start, end), _, _, _, repeat_spacers, _, _))) => Ok((
141 remainder,
142 Array {
143 order,
144 start,
145 end,
146 repeat_spacers,
147 },
148 )),
149 Err(e) => Err(e),
150 }
151}
152
153fn skip_one_line(input: &str) -> IResult<&str, ()> {
155 let result = pair(not_line_ending, line_ending)(input);
156 match result {
157 Ok((remaining, _)) => Ok((remaining, ())),
158 Err(e) => Err(e),
159 }
160}
161
162fn skip_empty_line(input: &str) -> IResult<&str, ()> {
164 let result = line_ending(input);
165 match result {
166 Ok((remaining, _)) => Ok((remaining, ())),
167 Err(e) => Err(e),
168 }
169}
170
171fn parse_footer(input: &str) -> IResult<&str, ()> {
173 let result = tuple((
174 skip_empty_line,
175 skip_one_line,
176 skip_empty_line,
177 skip_empty_line,
178 ))(input);
179 match result {
180 Ok((remainder, _)) => Ok((remainder, ())),
181 Err(e) => Err(e),
182 }
183}
184
185fn parse_crispr_order_and_coordinates(input: &str) -> IResult<&str, (usize, usize, usize)> {
188 let result = tuple((
189 tag("CRISPR"),
190 char(' '),
191 digit1,
192 multispace1,
193 tag("Range:"),
194 char(' '),
195 digit1,
196 tag(" - "),
197 digit1,
198 ))(input);
199 match result {
200 Ok((remaining, (_, _, raw_order, _, _, _, start, _, end))) => Ok((
201 remaining,
202 (
203 raw_order.parse::<usize>().unwrap() - 1,
204 start.parse::<usize>().unwrap() - 1,
205 end.parse::<usize>().unwrap(),
206 ),
207 )),
208 Err(e) => Err(e),
209 }
210}
211
212fn parse_accession_line(input: &str) -> IResult<&str, (&str, usize)> {
214 let result = tuple((
215 tag("Sequence '"),
216 take_until("'"),
217 tag("'"),
218 char(' '),
219 tag("("),
220 take_until(" "),
221 tag(" bp)"),
222 ))(input);
223 match result {
224 Ok((remainder, (_, accession, _, _, _, bp, _))) => {
225 Ok((remainder, (accession, bp.parse::<usize>().unwrap())))
226 }
227 Err(e) => Err(e),
228 }
229}
230
231fn parse_repeat_spacer_line(input: &str) -> IResult<&str, Repeat> {
233 alt((parse_repeat_with_spacer, parse_repeat_only))(input)
234}
235
236fn parse_repeat_only(input: &str) -> IResult<&str, Repeat> {
238 let result = tuple((digit1, multispace1, alpha1, multispace1))(input);
239 match result {
240 Ok((remaining, (raw_start, _, repeat, _))) => {
241 let start = raw_start.parse::<usize>().unwrap() - 1;
242 Ok((
243 remaining,
244 Repeat::WithoutSpacer(RepeatOnly {
245 repeat,
246 start,
247 end: start + repeat.len(),
248 }),
249 ))
250 }
251 Err(e) => Err(e),
252 }
253}
254
255fn parse_repeat_with_spacer(input: &str) -> IResult<&str, Repeat> {
257 let result = tuple((
258 digit1,
259 multispace1,
260 alpha1,
261 multispace1,
262 alpha1,
263 not_line_ending,
264 line_ending,
265 ))(input);
266 match result {
267 Ok((remaining, (raw_start, _, repeat, _, spacer, _, _))) => {
268 let start = raw_start.parse::<usize>().unwrap() - 1;
269 Ok((
270 remaining,
271 Repeat::WithSpacer(RepeatSpacer {
272 repeat,
273 spacer,
274 start,
275 end: start + repeat.len() + spacer.len(),
276 repeat_start: start,
277 repeat_end: start + repeat.len(),
278 spacer_start: start + repeat.len(),
279 spacer_end: start + repeat.len() + spacer.len(),
280 }),
281 ))
282 }
283 Err(e) => Err(e),
284 }
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290
291 #[test]
292 fn test_parse_array() {
293 let input = "\nCRISPR 1 Range: 10648 - 10814
294POSITION REPEAT SPACER
295-------- ----------------------------- ----------------------------------------
29610648 CAAGTGCACCAACCAATCTCACCACCTCA GGGGGTGCACTTAAAGGGGGTGCACTTGTCTCAAGTGCACCAAGAA [ 29, 46 ]
29710723 CAAGTGCACCAACCAATCTCACCACCTCA CCATCTCACCACCTCTCAGGGGGTGCAGTTGTCT [ 29, 34 ]
29810786 CAAGTGCACCAACCAATCTCACCACCTCA
299-------- ----------------------------- ----------------------------------------
300Repeats: 3 Average Length: 29 Average Length: 40\n";
301 let expected = Array {
302 order: 0,
303 start: 10647,
304 end: 10814,
305 repeat_spacers: vec![
306 Repeat::WithSpacer(RepeatSpacer {
307 start: 10647,
308 end: 10722,
309 repeat_start: 10647,
310 repeat_end: 10676,
311 spacer_start: 10676,
312 spacer_end: 10722,
313 repeat: "CAAGTGCACCAACCAATCTCACCACCTCA",
314 spacer: "GGGGGTGCACTTAAAGGGGGTGCACTTGTCTCAAGTGCACCAAGAA",
315 }),
316 Repeat::WithSpacer(RepeatSpacer {
317 start: 10722,
318 end: 10785,
319 repeat_start: 10722,
320 repeat_end: 10751,
321 spacer_start: 10751,
322 spacer_end: 10785,
323 repeat: "CAAGTGCACCAACCAATCTCACCACCTCA",
324 spacer: "CCATCTCACCACCTCTCAGGGGGTGCAGTTGTCT",
325 }),
326 Repeat::WithoutSpacer(RepeatOnly {
327 start: 10785,
328 end: 10814,
329 repeat: "CAAGTGCACCAACCAATCTCACCACCTCA",
330 }),
331 ],
332 };
333 let (_, actual) = parse_array(input).unwrap();
334 assert_eq!(expected, actual);
335 }
336
337 #[test]
338 fn test_parse_crispr_order_and_coordinates() {
339 let input = r"CRISPR 1 Range: 1214 - 1776";
340 let expected = (0, 1213, 1776);
341 let (_, actual) = parse_crispr_order_and_coordinates(input).unwrap();
342 assert_eq!(expected, actual);
343 }
344
345 #[test]
346 fn test_parse_accession_line() {
347 let input = "Sequence 'MGYG000166779_38' (12280 bp)";
348 let expected = ("MGYG000166779_38", 12280);
349 let (_, actual) = parse_accession_line(input).unwrap();
350 assert_eq!(expected, actual);
351 }
352
353 #[test]
354 fn test_parse_repeat_spacer() {
355 let input = "10723 CAAGTGCACCAACCAATCTCACCACCTCA CCATCTCACCACCTCTCAGGGGGTGCAGTTGTCT [ 29, 34 ]\n";
356 let expected = RepeatSpacer {
357 repeat: "CAAGTGCACCAACCAATCTCACCACCTCA",
358 spacer: "CCATCTCACCACCTCTCAGGGGGTGCAGTTGTCT",
359 start: 10722,
360 end: 10785,
361 repeat_start: 10722,
362 repeat_end: 10751,
363 spacer_start: 10751,
364 spacer_end: 10785,
365 };
366 let (_, actual) = parse_repeat_spacer_line(input).unwrap();
367 match actual {
368 Repeat::WithSpacer(act) => {
369 assert_eq!(expected, act);
370 }
371 _ => {
372 unreachable!()
373 }
374 }
375 }
376
377 #[test]
378 fn test_parse_repeat_only_line() {
379 let input = "10786 CAAGTGCACCAACCAATCTCACCACCTCA\n";
380 let expected = RepeatOnly {
381 repeat: "CAAGTGCACCAACCAATCTCACCACCTCA",
382 start: 10785,
383 end: 10814,
384 };
385 let (_, actual) = parse_repeat_spacer_line(input).unwrap();
386 match actual {
387 Repeat::WithoutSpacer(act) => {
388 assert_eq!(expected, act);
389 }
390 _ => {
391 unreachable!()
392 }
393 }
394 }
395
396 #[test]
397 fn test_parse_contig_arrays() {
398 let input = "Sequence 'MGYG000242676_4' (164254 bp)
399
400CRISPR 3 Range: 60487 - 61025
401POSITION REPEAT SPACER
402-------- ------------------------------------ --------------------------
40360487 TTTAATAACCCTATATAATTTCTACTATTGTAGATA TCTCCTTTGTAACTTCTTTGATTCGG [ 36, 26 ]
40460549 TTTAATAACCCTATATAATTTCTACTGTCGTAGATA TTGTTCTTTTATATGTGTACATAGCTAGA [ 36, 29 ]
40560990 TTTAATAACCCTATATAATTTCTACTTTTTTGATTA
406-------- ------------------------------------ --------------------------
407Repeats: 9 Average Length: 36 Average Length: 26
408
409CRISPR 4 Range: 157550 - 157915
410POSITION REPEAT SPACER
411-------- ------------------------------------ ------------------------------
412157550 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC GAGGGGTTGTCCTTCATGTACTCTTTACCT [ 36, 30 ]
413157748 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC GGGCTTATACTCTGACTTTCAACAAGTTAG [ 36, 30 ]
414157814 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC CCGATTTTTTCATTGCCAAAACGATATTTT [ 36, 30 ]
415157880 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC
416-------- ------------------------------------ ------------------------------
417Repeats: 6 Average Length: 36 Average Length: 30
418
419Time to find repeats: 22 ms
420
421
422";
423 let (_, contig) = parse_contig_arrays(input).unwrap();
424 assert_eq!(contig.accession, "MGYG000242676_4");
425 assert_eq!(contig.bp, 164254);
426 assert_eq!(contig.arrays.len(), 2);
427 }
428
429 #[test]
430 fn test_parse() {
431 let input = "Sequence 'MGYG000166779_38' (12280 bp)
432
433CRISPR 1 Range: 10648 - 10814
434POSITION REPEAT SPACER
435-------- ----------------------------- ----------------------------------------
43610648 CAAGTGCACCAACCAATCTCACCACCTCA GGGGGTGCACTTAAAGGGGGTGCACTTGTCTCAAGTGCACCAAGAA [ 29, 46 ]
43710723 CAAGTGCACCAACCAATCTCACCACCTCA CCATCTCACCACCTCTCAGGGGGTGCAGTTGTCT [ 29, 34 ]
43810786 CAAGTGCACCAACCAATCTCACCACCTCA
439-------- ----------------------------- ----------------------------------------
440Repeats: 3 Average Length: 29 Average Length: 40
441
442Time to find repeats: 3 ms
443
444
445Sequence 'MGYG000166779_43' (11302 bp)
446
447CRISPR 2 Range: 4 - 1413
448POSITION REPEAT SPACER
449-------- ------------------------------------ -----------------------------
4504 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC ACGGGTGCACTTTCGATGTCGCACTTTTTG [ 36, 30 ]
45170 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC TATACATCATCGTACATATAAGCATACAG [ 36, 29 ]
452135 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC GAAAAATCAGAGCCCAAAGTACGAGTAAC [ 36, 29 ]
453200 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC CCAGTTCCCGAATTTGATGCTCTTGGCAT [ 36, 29 ]
454265 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC ACTTACAACAACAACAATAACAATAAATG [ 36, 29 ]
455330 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC ATACGTGTGCTCTATATACGCACCCATTGG [ 36, 30 ]
456396 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC GGAGCTCTTTCGATGTCGCACTTTCTGAAG [ 36, 30 ]
457462 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC CGTGCTCGCTTTGAATTTGTAGAACCCGA [ 36, 29 ]
458527 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC TCTCGACACTATTTCTAACGAGGAAATTAA [ 36, 30 ]
459593 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC GCGCTGAGAAGTTACCACCGACCGCTTGA [ 36, 29 ]
460658 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC AATACTAAACCAAGATTGCCAAAGGTCCA [ 36, 29 ]
461723 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC AGATGATCTACGCTCAATATTAGAAAAAC [ 36, 29 ]
462788 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC GTATCTGCGGAACAAGTACAGAGAACATGA [ 36, 30 ]
463854 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC CGAACCTAATACGGCTTTAGCCTTTTTGCA [ 36, 30 ]
464920 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC AATGAGTACCAAAAGCAAAGAACAAATCGA [ 36, 30 ]
465986 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC TATATTTTTGTGCGTTACCCGTCCGTGAGG [ 36, 30 ]
4661052 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC TTACTTACGACTATTACGACCAGGTGAAC [ 36, 29 ]
4671117 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC ATAATTATAATCGGAAATCAAGCGGATAA [ 36, 29 ]
4681182 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC TTTGAATTAGATTCGGCAACCTTAGCATT [ 36, 29 ]
4691247 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC TTTTCATCATATTCATAAGAATAGCGACC [ 36, 29 ]
4701312 GTTGTGGTTTGATGTAGGAATCAAAAGATATACAAC CCATACGCTCCTTGGTGGTCTTGGTAAGGA [ 36, 30 ]
4711378 GTTGTGGTTTGATGTAGAAATCAAAAGACATACAAC
472-------- ------------------------------------ -----------------------------
473Repeats: 22 Average Length: 36 Average Length: 29
474
475Time to find repeats: 3 ms
476
477
478Sequence 'MGYG000242676_4' (164254 bp)
479
480CRISPR 3 Range: 60487 - 61025
481POSITION REPEAT SPACER
482-------- ------------------------------------ --------------------------
48360487 TTTAATAACCCTATATAATTTCTACTATTGTAGATA TCTCCTTTGTAACTTCTTTGATTCGG [ 36, 26 ]
48460549 TTTAATAACCCTATATAATTTCTACTGTCGTAGATA TTGTTCTTTTATATGTGTACATAGCTAGA [ 36, 29 ]
48560614 TTTAATAACCCTATATAATTTCTACTATTGTAGATA ACCTCCTTTGGATTTTCAGCAAATCAGG [ 36, 28 ]
48660678 TTTAATAACCCTATATAATTTCTACTATTTTAGATA ATACTGCTTGTTCTGTAAAAATTTTG [ 36, 26 ]
48760740 TTTAATAACTCTATATAATTTCTACTATTGTAGATG GAGTTCTCCAACCGTTTGCGGCAATA [ 36, 26 ]
48860802 TTTAATAACCCTATATAATTTCTACTATTGTAGATA ACGGTTGAATCAATGAGAAATGTTGTG [ 36, 27 ]
48960865 TTTAATAACCCTATATAATTTCTACTATTGTAGATA TGATATTGACGGTGACCTGATTAACCG [ 36, 27 ]
49060928 TTTAATAACCCTATATAATTTCTACTATTGTAGATA TGTCAATCACATCTGTGACCGCAAGG [ 36, 26 ]
49160990 TTTAATAACCCTATATAATTTCTACTTTTTTGATTA
492-------- ------------------------------------ --------------------------
493Repeats: 9 Average Length: 36 Average Length: 26
494
495CRISPR 4 Range: 157550 - 157915
496POSITION REPEAT SPACER
497-------- ------------------------------------ ------------------------------
498157550 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC GAGGGGTTGTCCTTCATGTACTCTTTACCT [ 36, 30 ]
499157616 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC ATACAAATGCATTGCCGAGGACAGTGTTTT [ 36, 30 ]
500157682 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC TATACGGTTTGCCCGTGCAGTCTTGTACAA [ 36, 30 ]
501157748 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC GGGCTTATACTCTGACTTTCAACAAGTTAG [ 36, 30 ]
502157814 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC CCGATTTTTTCATTGCCAAAACGATATTTT [ 36, 30 ]
503157880 GTTTTACTACCTTATAGATTTACACTATTCTCAAAC
504-------- ------------------------------------ ------------------------------
505Repeats: 6 Average Length: 36 Average Length: 30
506
507Time to find repeats: 22 ms
508
509
510Sequence 'MGYG000273829_14' (62198 bp)
511
512CRISPR 5 Range: 15191 - 17205
513POSITION REPEAT SPACER
514-------- ------------------------------------ -----------------------------
51515191 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT ATCGCTGAACCTACAACAGACGCAAGAACA [ 36, 30 ]
51615257 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT GATATTGTCATACCTAAGTAAATAGGTGCG [ 36, 30 ]
51715323 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TACAATCAGTCTATAACATTTGCAACTACG [ 36, 30 ]
51815389 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TATTATAGACAGCAAGCAACTTGATGTAT [ 36, 29 ]
51915454 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT CTTGAATTTGGGGAGATGTTCTCAGCTGGT [ 36, 30 ]
52015520 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AAAGTTTGCTGACAGGGACATTCAAAGCCG [ 36, 30 ]
52115586 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AAACCTGTCTGTCCGATCTGCACCATATAT [ 36, 30 ]
52215652 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT ACCGCTATTGCGCTGCAGCATCCACAAGGA [ 36, 30 ]
52315718 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT GCATCTTCCTGCGCTCTCTCTGAAAACATG [ 36, 30 ]
52415784 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT CGAAGCCTAAAGCTCATTTCGCTTAGGCTT [ 36, 30 ]
52515850 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AAACATGGTGTTGATGTCAAAGAGCTGTAT [ 36, 30 ]
52615916 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TGGGCGAATATAAATTCCATCGGTGGCAAG [ 36, 30 ]
52715982 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AATATTGGATGATGTGTATGGCATTTTACT [ 36, 30 ]
52816048 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AGTGTATATGTGAACCCTGCTCCCAGTGCT [ 36, 30 ]
52916114 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AAAGACCGGAGCAAAGATGTCCGGGAGCCG [ 36, 30 ]
53016180 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TGAAAGTGGTGTAATTGTTATAACTCATTG [ 36, 30 ]
53116246 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AGACAACAGGTGTGGAAGCATATGTCTTTA [ 36, 30 ]
53216312 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TGCTGCATAGGTGTGTATTTTCTCATGTCG [ 36, 30 ]
53316378 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT GGTAATGGTGGTGGCGGTTATACCGCAACT [ 36, 30 ]
53416444 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT ATGGTCGGGGCTACATATTACGCCGCAGTA [ 36, 30 ]
53516510 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT CGTGAGGTCTCCGACCGTGAAAACAGTTCT [ 36, 30 ]
53616576 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT ACGAACTTAGTACCCTTTTCTGGGCGGCAT [ 36, 30 ]
53716642 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT CCGCAGGTGCTACCGCTGTTATACTCTGTT [ 36, 30 ]
53816708 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT CGTAAATCGTTGGCGAAACGCTACCAACTG [ 36, 30 ]
53916774 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT CCTCGGTCTGCTCTAACAGATCCCCCAAGT [ 36, 30 ]
54016840 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT ACAGAGAAAGAAAGAGAGATTAACGACTAC [ 36, 30 ]
54116906 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TGAAACGGAGTGGACAGGTAAAGGAATGGG [ 36, 30 ]
54216972 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TGCGGTCCCTTGGTTCCGTCAACAACATCA [ 36, 30 ]
54317038 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT TGTCCTATTCCCTTTTATGCTGCGTGTATA [ 36, 30 ]
54417104 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT AATACAAGCATAAAGAACGAACCGCAACGG [ 36, 30 ]
54517170 GCTGTAGTTCCCGGTTATTACTTGGTATGTTATAAT
546-------- ------------------------------------ -----------------------------
547Repeats: 31 Average Length: 36 Average Length: 29
548
549Time to find repeats: 9 ms
550
551
552";
553 let contigs = parse(input).unwrap();
554 assert_eq!(contigs.len(), 4);
555 let array_count: usize = contigs.iter().map(|c| c.arrays.len()).sum();
556 assert_eq!(array_count, 5);
557 }
558}