1use ahash::AHashSet;
2use aho_corasick::AhoCorasick;
3use bio::data_structures::bitenc::BitEnc;
4use bitvec::prelude::*;
5use seq_io::fastq::RefRecord;
6use std::ops::Range;
7
8use crate::AMINO_ACIDS;
9use crate::DNA_BASES;
10use crate::DNA_MASK_VALUES;
11use crate::color::{COLOR_BACKGROUND, COLOR_BASES, COLOR_QUALS};
12use crate::color::{color_background, color_head};
13use crate::encode;
14use crate::reverse_complement;
15use anyhow::{Context, Result, bail};
16use bstr::ByteSlice;
17use regex::bytes::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
18use seq_io::fastq::{OwnedRecord, Record};
19
20#[derive(Copy, Clone, Debug)]
22pub struct MatcherOpts {
23 pub invert_match: bool,
25 pub reverse_complement: bool,
28}
29
30fn to_bitvec(ranges: impl Iterator<Item = Range<usize>>, len: usize) -> BitVec {
32 let mut vec = bitvec![0; len];
33 ranges.for_each(|range| {
34 for index in range {
35 vec.set(index, true);
36 }
37 });
38 vec
39}
40
41fn bases_colored(
44 bases: &[u8],
45 quals: &[u8],
46 ranges: impl Iterator<Item = Range<usize>>,
47) -> (Vec<u8>, Vec<u8>) {
48 let mut colored_bases = Vec::with_capacity(bases.len());
50 let mut colored_quals = Vec::with_capacity(bases.len());
51
52 let bits = to_bitvec(ranges, bases.len());
54
55 let mut last_color_on = false;
58 let mut last_bases_index = 0;
59 let mut cur_bases_index = 0;
60 for base_color_on in bits.iter() {
61 if *base_color_on {
62 if !last_color_on {
64 if last_bases_index < cur_bases_index {
66 COLOR_BACKGROUND
67 .paint(&bases[last_bases_index..cur_bases_index])
68 .write_to(&mut colored_bases)
69 .unwrap();
70 COLOR_BACKGROUND
71 .paint(&quals[last_bases_index..cur_bases_index])
72 .write_to(&mut colored_quals)
73 .unwrap();
74 }
75 last_bases_index = cur_bases_index;
77 }
78
79 last_color_on = true;
80 } else {
81 if last_color_on {
83 if last_bases_index < cur_bases_index {
85 COLOR_BASES
86 .paint(&bases[last_bases_index..cur_bases_index])
87 .write_to(&mut colored_bases)
88 .unwrap();
89 COLOR_QUALS
90 .paint(&quals[last_bases_index..cur_bases_index])
91 .write_to(&mut colored_quals)
92 .unwrap();
93 }
94 last_bases_index = cur_bases_index;
96 }
97 last_color_on = false;
98 }
99 cur_bases_index += 1;
100 }
101 if last_bases_index < cur_bases_index {
103 if last_color_on {
104 COLOR_BASES
105 .paint(&bases[last_bases_index..cur_bases_index])
106 .write_to(&mut colored_bases)
107 .unwrap();
108 COLOR_QUALS
109 .paint(&quals[last_bases_index..cur_bases_index])
110 .write_to(&mut colored_quals)
111 .unwrap();
112 } else {
113 COLOR_BACKGROUND
114 .paint(&bases[last_bases_index..cur_bases_index])
115 .write_to(&mut colored_bases)
116 .unwrap();
117 COLOR_BACKGROUND
118 .paint(&quals[last_bases_index..cur_bases_index])
119 .write_to(&mut colored_quals)
120 .unwrap();
121 }
122 }
123
124 (colored_bases, colored_quals)
125}
126
127pub fn validate_fixed_pattern(pattern: &str, protein: bool) -> Result<()> {
129 let valid_chars = if protein {
130 &AMINO_ACIDS[..]
131 } else {
132 &DNA_BASES[..]
133 };
134 let kind = if protein { "amino acids" } else { "DNA bases" };
135 for (index, base) in pattern.char_indices() {
136 if !base.is_ascii() || !valid_chars.contains(&(base as u8)) {
137 let next_index = index + base.len_utf8();
138 bail!(
139 "Fixed pattern must contain only {kind}: {} .. [{}] .. {}",
140 &pattern[0..index],
141 &pattern[index..next_index],
142 &pattern[next_index..],
143 )
144 }
145 }
146 Ok(())
147}
148
149pub trait Matcher {
151 fn opts(&self) -> MatcherOpts;
153
154 fn bases_match(&self, bases: &[u8]) -> bool;
156
157 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>);
161
162 #[inline]
164 fn read_match(&self, read: &RefRecord) -> bool {
165 let bases_match = self.bases_match(read.seq());
166 if self.opts().invert_match {
167 bases_match
168 && (!self.opts().reverse_complement
169 || self.bases_match(&reverse_complement(read.seq())))
170 } else {
171 bases_match
172 || (self.opts().reverse_complement
173 && self.bases_match(&reverse_complement(read.seq())))
174 }
175 }
176
177 #[inline]
180 fn color(&self, read: &mut OwnedRecord, match_found: bool) {
181 if match_found {
182 let (seq, qual) = self.color_matched_bases(&read.seq, &read.qual);
183 read.head = color_head(&read.head);
184 read.seq = seq;
185 read.qual = qual;
186 } else {
187 read.head = color_background(&read.head);
189 read.seq = color_background(&read.seq);
190 read.qual = color_background(&read.qual);
191 }
192 }
193}
194
195pub struct FixedStringMatcher {
197 pattern: Vec<u8>,
198 opts: MatcherOpts,
199}
200
201impl Matcher for FixedStringMatcher {
202 #[inline]
203 fn bases_match(&self, bases: &[u8]) -> bool {
204 bases.find(&self.pattern).is_some() != self.opts.invert_match
205 }
206
207 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
208 let ranges = bases.find_iter(&self.pattern).map(|start| Range {
209 start,
210 end: start + self.pattern.len(),
211 });
212 if self.opts().reverse_complement {
213 let bases_revcomp = &reverse_complement(bases);
214 let ranges_revcomp = bases_revcomp
215 .find_iter(&self.pattern)
216 .map(|start| bases.len() - start - self.pattern.len())
217 .map(|start| Range {
218 start,
219 end: start + self.pattern.len(),
220 });
221 bases_colored(bases, quals, ranges.chain(ranges_revcomp))
222 } else {
223 bases_colored(bases, quals, ranges)
224 }
225 }
226
227 #[inline]
228 fn opts(&self) -> MatcherOpts {
229 self.opts
230 }
231}
232
233impl FixedStringMatcher {
234 pub fn new(pattern: &str, opts: MatcherOpts) -> Self {
235 let pattern = pattern.as_bytes().to_vec();
236 Self { pattern, opts }
237 }
238}
239
240pub struct FixedStringSetMatcher {
242 patterns: Vec<Vec<u8>>,
243 aho_corasick: AhoCorasick,
244 opts: MatcherOpts,
245}
246
247impl Matcher for FixedStringSetMatcher {
248 #[inline]
249 fn bases_match(&self, bases: &[u8]) -> bool {
250 self.aho_corasick.is_match(bases) != self.opts.invert_match
251 }
252
253 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
254 let ranges = self.patterns.iter().flat_map(|pattern| {
255 bases
256 .find_iter(&pattern)
257 .map(|start| Range {
258 start,
259 end: start + pattern.len(),
260 })
261 .collect::<Vec<_>>()
262 });
263 if self.opts().reverse_complement {
264 let bases_revcomp = &reverse_complement(bases);
265 let ranges_revcomp = self.patterns.iter().flat_map(|pattern| {
266 bases_revcomp
267 .find_iter(&pattern)
268 .map(|start| bases.len() - start - pattern.len())
269 .map(|start| Range {
270 start,
271 end: start + pattern.len(),
272 })
273 .collect::<Vec<_>>()
274 });
275 bases_colored(bases, quals, ranges.chain(ranges_revcomp))
276 } else {
277 bases_colored(bases, quals, ranges)
278 }
279 }
280
281 #[inline]
282 fn opts(&self) -> MatcherOpts {
283 self.opts
284 }
285}
286
287impl FixedStringSetMatcher {
288 pub fn new<I, S>(patterns: I, opts: MatcherOpts) -> Result<Self>
289 where
290 S: AsRef<str>,
291 I: IntoIterator<Item = S>,
292 {
293 let patterns: Vec<Vec<u8>> = patterns
294 .into_iter()
295 .map(|pattern| pattern.as_ref().to_owned().as_bytes().to_vec())
296 .collect();
297 let aho_corasick =
298 AhoCorasick::new(&patterns).context("Failed to build Aho-Corasick automaton")?;
299 Ok(Self {
300 patterns,
301 aho_corasick,
302 opts,
303 })
304 }
305}
306
307fn bitenc_match_offsets(
311 bases: &BitEnc,
312 needle: &BitEnc,
313 has_iupac: bool,
314 mut on_match: impl FnMut(usize) -> bool,
315) {
316 if bases.nr_symbols() < needle.nr_symbols() {
317 return;
318 }
319
320 let mut bases_hash: usize = 0;
321 let needle_hash = if has_iupac {
322 0
323 } else {
324 (0..needle.nr_symbols())
325 .map(|i| needle.get(i).unwrap() as usize)
326 .sum()
327 };
328
329 'outer: for offset in 0..=bases.nr_symbols() - needle.nr_symbols() {
330 if !has_iupac {
331 if offset == 0 {
332 bases_hash = (0..needle.nr_symbols())
333 .map(|i| bases.get(i).unwrap() as usize)
334 .sum();
335 } else {
336 bases_hash += bases.get(offset + needle.nr_symbols() - 1).unwrap() as usize;
337 bases_hash -= bases.get(offset - 1).unwrap() as usize;
338 }
339 if bases_hash != needle_hash {
340 continue 'outer;
341 }
342 }
343 for i in 0..needle.nr_symbols() {
344 let base = bases.get(offset + i).unwrap();
345 if needle.get(i).unwrap() & base != base {
346 continue 'outer;
347 }
348 }
349 if !on_match(offset) {
350 return;
351 }
352 }
353}
354
355fn bitenc_find_all(bases: &BitEnc, needle: &BitEnc, has_iupac: bool) -> Vec<Range<usize>> {
358 let mut ranges: Vec<Range<usize>> = Vec::new();
359 let needle_len = needle.nr_symbols();
360 bitenc_match_offsets(bases, needle, has_iupac, |offset| {
361 ranges.push(offset..offset + needle_len);
362 true
363 });
364 ranges
365}
366
367fn bitenc_find(bases: &BitEnc, needle: &BitEnc, has_iupac: bool) -> bool {
370 let mut found = false;
371 bitenc_match_offsets(bases, needle, has_iupac, |_| {
372 found = true;
373 false });
375 found
376}
377
378fn bitenc_has_iupac(bitenc: &BitEnc) -> bool {
381 !bitenc.iter().all(|value| DNA_MASK_VALUES.contains(&value))
382}
383
384pub struct BitMaskMatcher {
386 bitenc: BitEnc,
387 has_iupac: bool,
388 opts: MatcherOpts,
389}
390
391impl Matcher for BitMaskMatcher {
392 #[inline]
393 fn bases_match(&self, bases: &[u8]) -> bool {
394 let bases = encode(bases);
395 bitenc_find(&bases, &self.bitenc, self.has_iupac) != self.opts.invert_match
396 }
397
398 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
399 let encoded_bases = encode(bases);
400 let ranges = bitenc_find_all(&encoded_bases, &self.bitenc, self.has_iupac);
401 if self.opts().reverse_complement {
402 let bases_revcomp = &reverse_complement(bases);
403 let encoded_revcomp = encode(bases_revcomp);
404 let ranges_revcomp = bitenc_find_all(&encoded_revcomp, &self.bitenc, self.has_iupac)
405 .into_iter()
406 .map(|range| Range {
407 start: bases.len() - range.start - range.len(),
408 end: bases.len() - range.start,
409 });
410 bases_colored(bases, quals, ranges.into_iter().chain(ranges_revcomp))
411 } else {
412 bases_colored(bases, quals, ranges.into_iter())
413 }
414 }
415
416 #[inline]
417 fn opts(&self) -> MatcherOpts {
418 self.opts
419 }
420}
421
422impl BitMaskMatcher {
423 pub fn new(bitenc: BitEnc, opts: MatcherOpts) -> Self {
424 let has_iupac = bitenc_has_iupac(&bitenc);
425 Self {
426 bitenc,
427 has_iupac,
428 opts,
429 }
430 }
431}
432
433pub struct BitMaskSetMatcher {
435 bitencs: Vec<BitEnc>,
436 has_iupac: Vec<bool>,
437 opts: MatcherOpts,
438}
439
440impl Matcher for BitMaskSetMatcher {
441 fn bases_match(&self, bases: &[u8]) -> bool {
442 let bases = encode(bases);
443 self.bitencs
444 .iter()
445 .enumerate()
446 .any(|(index, needle)| bitenc_find(&bases, needle, self.has_iupac[index]))
447 != self.opts.invert_match
448 }
449
450 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
451 let encoded_bases = encode(bases);
452 let ranges = self.bitencs.iter().enumerate().flat_map(|(index, needle)| {
453 bitenc_find_all(&encoded_bases, needle, self.has_iupac[index])
454 });
455 if self.opts().reverse_complement {
456 let bases_revcomp = &reverse_complement(bases);
457 let encoded_revcomp = encode(bases_revcomp);
458 let ranges_revcomp = self.bitencs.iter().enumerate().flat_map(|(index, needle)| {
459 bitenc_find_all(&encoded_revcomp, needle, self.has_iupac[index])
460 .into_iter()
461 .map(|range| Range {
462 start: bases.len() - range.start - range.len(),
463 end: bases.len() - range.start,
464 })
465 });
466 bases_colored(bases, quals, ranges.chain(ranges_revcomp))
467 } else {
468 bases_colored(bases, quals, ranges)
469 }
470 }
471
472 #[inline]
473 fn opts(&self) -> MatcherOpts {
474 self.opts
475 }
476}
477
478impl BitMaskSetMatcher {
479 pub fn new(bitencs: Vec<BitEnc>, opts: MatcherOpts) -> Self {
480 let has_iupac = bitencs.iter().map(bitenc_has_iupac).collect();
481 Self {
482 bitencs,
483 has_iupac,
484 opts,
485 }
486 }
487}
488
489pub struct RegexMatcher {
491 regex: Regex,
492 opts: MatcherOpts,
493}
494
495impl RegexMatcher {
496 pub fn new(pattern: &str, opts: MatcherOpts) -> Result<Self> {
497 let regex = RegexBuilder::new(pattern)
498 .build()
499 .with_context(|| format!("Invalid regular expression: '{}'", pattern))?;
500 Ok(Self { regex, opts })
501 }
502}
503
504impl Matcher for RegexMatcher {
505 #[inline]
506 fn bases_match(&self, bases: &[u8]) -> bool {
507 self.regex.is_match(bases) != self.opts.invert_match
508 }
509
510 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
511 let ranges = self.regex.find_iter(bases).map(|m| m.range());
512 if self.opts().reverse_complement {
513 let bases_revcomp = &reverse_complement(bases);
514 let ranges_revcomp =
515 self.regex
516 .find_iter(bases_revcomp)
517 .map(|m| m.range())
518 .map(|range| Range {
519 start: bases.len() - range.start - range.len(),
520 end: bases.len() - range.start,
521 });
522 bases_colored(bases, quals, ranges.chain(ranges_revcomp))
523 } else {
524 bases_colored(bases, quals, ranges)
525 }
526 }
527
528 #[inline]
529 fn opts(&self) -> MatcherOpts {
530 self.opts
531 }
532}
533
534pub struct RegexSetMatcher {
535 regex_set: RegexSet,
536 regex_matchers: Vec<RegexMatcher>,
537 opts: MatcherOpts,
538}
539
540impl RegexSetMatcher {
542 pub fn new<I, S>(patterns: I, opts: MatcherOpts) -> Result<Self>
543 where
544 S: AsRef<str>,
545 I: IntoIterator<Item = S>,
546 {
547 let string_patterns: Vec<String> = patterns
548 .into_iter()
549 .map(|p| p.as_ref().to_string())
550 .collect();
551 let regex_set = RegexSetBuilder::new(string_patterns.clone())
552 .dfa_size_limit(usize::MAX)
553 .build()
554 .context("Failed to build regex set from patterns")?;
555 let regex_matchers: Vec<RegexMatcher> = string_patterns
556 .into_iter()
557 .map(|pattern| RegexMatcher::new(pattern.as_ref(), opts))
558 .collect::<Result<Vec<_>>>()?;
559 Ok(Self {
560 regex_set,
561 regex_matchers,
562 opts,
563 })
564 }
565}
566
567impl Matcher for RegexSetMatcher {
568 #[inline]
569 fn bases_match(&self, bases: &[u8]) -> bool {
570 self.regex_set.is_match(bases) != self.opts.invert_match
571 }
572
573 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
574 let ranges = self
575 .regex_matchers
576 .iter()
577 .flat_map(|r| r.regex.find_iter(bases).map(|m| m.range()));
578 if self.opts().reverse_complement {
579 let bases_revcomp = &reverse_complement(bases);
580 let ranges_revcomp = self.regex_matchers.iter().flat_map(|r| {
581 r.regex
582 .find_iter(bases_revcomp)
583 .map(|m| m.range())
584 .map(|range| Range {
585 start: bases.len() - range.start - range.len(),
586 end: bases.len() - range.start,
587 })
588 });
589 bases_colored(bases, quals, ranges.chain(ranges_revcomp))
590 } else {
591 bases_colored(bases, quals, ranges)
592 }
593 }
594
595 #[inline]
596 fn opts(&self) -> MatcherOpts {
597 self.opts
598 }
599}
600
601pub struct QueryNameMatcher {
604 query_names: AHashSet<Vec<u8>>,
605 opts: MatcherOpts,
606}
607
608impl QueryNameMatcher {
609 pub fn new(query_names: AHashSet<Vec<u8>>, opts: MatcherOpts) -> Self {
610 Self { query_names, opts }
611 }
612}
613
614impl Matcher for QueryNameMatcher {
615 #[inline]
617 fn bases_match(&self, _bases: &[u8]) -> bool {
618 !self.opts.invert_match
619 }
620
621 fn color_matched_bases(&self, bases: &[u8], quals: &[u8]) -> (Vec<u8>, Vec<u8>) {
622 (bases.to_vec(), quals.to_vec())
623 }
624
625 #[inline]
626 fn opts(&self) -> MatcherOpts {
627 self.opts
628 }
629
630 #[inline]
631 fn read_match(&self, read: &RefRecord) -> bool {
632 let id_match = self.query_names.contains(read.id_bytes());
633 if self.opts.invert_match {
634 !id_match
635 } else {
636 id_match
637 }
638 }
639}
640
641pub struct MatcherFactory;
643
644impl MatcherFactory {
645 pub fn new_matcher(
646 pattern: &Option<String>,
647 fixed_strings: bool,
648 regexp: &Vec<String>,
649 bitencs: Option<Vec<BitEnc>>,
650 match_opts: MatcherOpts,
651 ) -> Result<Box<dyn Matcher + Sync + Send>> {
652 let use_bitmask = bitencs.is_some();
653 let is_single = pattern.is_some();
654 match (use_bitmask, is_single, fixed_strings) {
655 (true, true, _) => {
656 let bitencs = bitencs.unwrap();
657 Ok(Box::new(BitMaskMatcher::new(
658 bitencs.into_iter().next().unwrap(),
659 match_opts,
660 )))
661 }
662 (true, false, _) => Ok(Box::new(BitMaskSetMatcher::new(
663 bitencs.unwrap(),
664 match_opts,
665 ))),
666 (false, true, true) => Ok(Box::new(FixedStringMatcher::new(
667 pattern.as_ref().unwrap(),
668 match_opts,
669 ))),
670 (false, true, false) => Ok(Box::new(RegexMatcher::new(
671 pattern.as_ref().unwrap(),
672 match_opts,
673 )?)),
674 (false, false, true) => Ok(Box::new(FixedStringSetMatcher::new(regexp, match_opts)?)),
675 (false, false, false) => Ok(Box::new(RegexSetMatcher::new(regexp, match_opts)?)),
676 }
677 }
678
679 pub fn new_query_name_matcher(
681 query_names: AHashSet<Vec<u8>>,
682 match_opts: MatcherOpts,
683 ) -> Box<dyn Matcher + Sync + Send> {
684 Box::new(QueryNameMatcher::new(query_names, match_opts))
685 }
686}
687
688#[cfg(test)]
690pub mod tests {
691 use crate::encode;
692 use crate::matcher::*;
693 use ahash::AHashSet;
694 use rstest::rstest;
695
696 #[rstest]
701 #[case(vec![(0, 1)], "AGG", bitvec![1, 0, 0])] #[case(vec![(2, 3)], "AGG", bitvec![0, 0, 1])] #[case(vec![(1, 2)], "AGG", bitvec![0, 1, 0])] #[case(vec![(0 ,0)], "AGG", bitvec![0, 0, 0])] #[case(vec![(0, 3)], "AGG", bitvec![1, 1, 1])] #[case(vec![(1, 4)], "AGGTC", bitvec![0, 1, 1, 1, 0])] #[case(vec![(0, 2), (3, 5)], "AGGTC", bitvec![1, 1, 0, 1, 1])] #[case(vec![(0, 3), (3, 5)], "AGGTC", bitvec![1, 1, 1, 1, 1])] #[case(vec![(0, 4), (3, 5)], "AGGTC", bitvec![1, 1, 1, 1, 1])] #[case(vec![(0, 3), (0, 5)], "AGGTC", bitvec![1, 1, 1, 1, 1])] #[case(vec![(4, 5), (0, 2)], "AGGTC", bitvec![1, 1, 0, 0, 1])] fn test_to_bitvec(
713 #[case] ranges: Vec<(usize, usize)>,
714 #[case] bases: &str,
715 #[case] expected: BitVec,
716 ) {
717 let ranges = ranges
718 .into_iter()
719 .map(|(start, end)| std::ops::Range { start, end });
720 let result_bitvec = to_bitvec(ranges, bases.len());
721 assert_eq!(result_bitvec, expected);
722 }
723
724 #[test]
732 fn test_single_base_match_colored() {
733 let bases = b"ACGT";
734 let quals = b"IIII";
735
736 let ranges_start = vec![Range { start: 0, end: 1 }];
738 let (colored_bases, _) = bases_colored(bases, quals, ranges_start.into_iter());
739 assert!(
740 !colored_bases.is_empty(),
741 "single-base match at start must produce colored output"
742 );
743
744 let ranges_mid = vec![Range { start: 2, end: 3 }];
746 let (colored_bases, _) = bases_colored(bases, quals, ranges_mid.into_iter());
747 assert!(
748 !colored_bases.is_empty(),
749 "single-base match in middle must produce colored output"
750 );
751
752 let ranges_end = vec![Range { start: 3, end: 4 }];
754 let (colored_bases, _) = bases_colored(bases, quals, ranges_end.into_iter());
755 assert!(
756 !colored_bases.is_empty(),
757 "single-base match at end must produce colored output"
758 );
759
760 let ranges = vec![Range { start: 2, end: 3 }];
764 let (colored_bases, colored_quals) = bases_colored(bases, quals, ranges.into_iter());
765 let colored_str = String::from_utf8_lossy(&colored_bases);
766 let colored_qual_str = String::from_utf8_lossy(&colored_quals);
767 assert!(
768 colored_str.contains("AC"),
769 "uncolored prefix must be present"
770 );
771 assert!(colored_str.contains("G"), "colored base must be present");
772 assert!(
773 colored_str.contains("T"),
774 "uncolored suffix must be present"
775 );
776 assert!(
777 colored_qual_str.contains("II"),
778 "uncolored qual prefix must be present"
779 );
780 }
781
782 #[rstest]
787 #[case(false, "AG", "AGG", true)] #[case(false, "CC", "AGG", false)] #[case(true, "CC", "AGG", true)] #[case(true, "TT", "AGG", false)] #[case(false, "AT", "ATGAT", true)] #[case(true, "CG", "GCCG", true)] #[case(false, "AGAG", "AGAGAGAG", true)] #[case(true, "TCTC", "AGAGAGAG", true)] fn test_fixed_string_matcher_read_match(
796 #[case] reverse_complement: bool,
797 #[case] pattern: &str,
798 #[case] seq: &str,
799 #[case] expected: bool,
800 ) {
801 let invert_matches = [true, false];
802 for invert_match in IntoIterator::into_iter(invert_matches) {
803 let opts = MatcherOpts {
804 invert_match,
805 reverse_complement,
806 };
807 let matcher = FixedStringMatcher::new(pattern, opts);
808 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
809 let record = format!("@id\n{seq}\n+\n{qual}\n");
810 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
811 let read_record = reader.next().unwrap().unwrap();
812 let result = matcher.read_match(&read_record);
813 if invert_match {
814 assert_ne!(result, expected);
815 } else {
816 assert_eq!(result, expected);
817 }
818 }
819 }
820
821 #[rstest]
826 #[case(false, vec!["A", "AGG", "G"], "AGGG", true)] #[case(true, vec!["A", "AGG", "G"], "TCCC", true)] #[case(false, vec!["A", "AGG", "G"], "TTTT", false)] #[case(true, vec!["T", "AAA"], "CCCCC", false)] #[case(false, vec!["AGG", "C", "TT"], "AGGTT", true)] #[case(true, vec!["AGG", "C", "TT"], "GGGGG", true)] #[case(false, vec!["AC", "TT"], "TTACGTT", true)] #[case(true, vec!["GT", "AA"], "TTACGTT", true)] #[case(false, vec!["GAGA","AGTT"], "GAGAGTT", true)] #[case(true, vec!["CTCT","AACT"], "GAGAGTT", true)] fn test_fixed_string_set_metcher_read_match(
837 #[case] reverse_complement: bool,
838 #[case] patterns: Vec<&str>,
839 #[case] seq: &str,
840 #[case] expected: bool,
841 ) {
842 let invert_matches = [true, false];
843 for invert_match in IntoIterator::into_iter(invert_matches) {
844 let opts = MatcherOpts {
845 invert_match,
846 reverse_complement,
847 };
848 let matcher = FixedStringSetMatcher::new(patterns.iter(), opts).unwrap();
849 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
850 let record = format!("@id\n{seq}\n+\n{qual}\n");
851 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
852 let read_record = reader.next().unwrap().unwrap();
853 let result = matcher.read_match(&read_record);
854 if invert_match {
855 assert_ne!(result, expected);
856 } else {
857 assert_eq!(result, expected);
858 }
859 }
860 }
861
862 #[rstest]
867 #[case(false, "^A", "AGG", true)] #[case(false, "^T", "AGG", false)] #[case(true, "^C", "AGG", true)] #[case(true, "^T", "AGG", false)] #[case(false, "A.A", "ATATA", true)] #[case(true, "T.G", "CACACA", false)] fn test_regex_matcher_read_match(
874 #[case] reverse_complement: bool,
875 #[case] pattern: &str,
876 #[case] seq: &str,
877 #[case] expected: bool,
878 ) {
879 let invert_matches = [true, false];
880 for invert_match in IntoIterator::into_iter(invert_matches) {
881 let opts = MatcherOpts {
882 invert_match,
883 reverse_complement,
884 };
885
886 let matcher = RegexMatcher::new(pattern, opts).unwrap();
887 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
888 let record = format!("@id\n{seq}\n+\n{qual}\n");
889 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
890 let read_record = reader.next().unwrap().unwrap();
891 let result = matcher.read_match(&read_record);
892 if invert_match {
893 assert_ne!(result, expected);
894 } else {
895 assert_eq!(result, expected);
896 }
897 }
898 }
899
900 #[rstest]
905 #[case(false, vec!["^A.G", "C..", "$T"], "AGGCTT", true)] #[case(true, vec!["^T.C", "..G", "$A"], "AGGCTT", true)] #[case(false, vec!["^A.G", "G..", "$T"], "CCTCA", false)] #[case(true, vec!["$A", "C.CC"], "CCTCA", false)] #[case(false, vec!["^T", ".GG", "A.+G"], "ATCTACTACG", true)] #[case(true, vec!["^C", ".CC", "C+.T"], "ATCTACTACG", true)] #[case(false, vec!["^T", "T.A"], "TTAATAA", true)] #[case(true, vec!["^T", "T.A"], "AATA", true)] #[case(false, vec!["^T","T.+G"], "TAGAGTG", true)] #[case(true, vec!["^A","A.+C"], "TAGAGTG", true)] fn test_regex_set_metcher_read_match(
916 #[case] reverse_complement: bool,
917 #[case] patterns: Vec<&str>,
918 #[case] seq: &str,
919 #[case] expected: bool,
920 ) {
921 let invert_matches = [true, false];
922 for invert_match in IntoIterator::into_iter(invert_matches) {
923 let opts = MatcherOpts {
924 invert_match,
925 reverse_complement,
926 };
927
928 let matcher = RegexSetMatcher::new(patterns.iter(), opts).unwrap();
929 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
930 let record = format!("@id\n{seq}\n+\n{qual}\n");
931 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
932 let read_record = reader.next().unwrap().unwrap();
933 let result = matcher.read_match(&read_record);
934 if invert_match {
935 assert_ne!(result, expected);
936 } else {
937 assert_eq!(result, expected);
938 }
939 }
940 }
941
942 #[rstest]
947 #[case(false, vec!["read1", "read2"], "read1", true)] #[case(false, vec!["read1", "read2"], "read3", false)] #[case(false, vec!["read1"], "read1_extra", false)] #[case(false, vec!["read1_extra"], "read1", false)] #[case(true, vec!["read1", "read2"], "read1", false)] #[case(true, vec!["read1", "read2"], "read3", true)] fn test_query_name_matcher_read_match(
954 #[case] invert_match: bool,
955 #[case] query_names: Vec<&str>,
956 #[case] read_id: &str,
957 #[case] expected: bool,
958 ) {
959 let opts = MatcherOpts {
960 invert_match,
961 reverse_complement: false,
962 };
963 let names: AHashSet<Vec<u8>> = query_names.iter().map(|s| s.as_bytes().to_vec()).collect();
964 let matcher = QueryNameMatcher::new(names, opts);
965 let seq = "ACGT";
966 let qual = "XXXX";
967 let record = format!("@{read_id} description\n{seq}\n+\n{qual}\n");
968 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
969 let read_record = reader.next().unwrap().unwrap();
970 assert_eq!(matcher.read_match(&read_record), expected);
971 }
972
973 #[test]
974 fn test_query_name_matcher_with_header_description() {
975 let opts = MatcherOpts {
977 invert_match: false,
978 reverse_complement: false,
979 };
980 let names: AHashSet<Vec<u8>> = vec!["SRR001666.1".as_bytes().to_vec()]
981 .into_iter()
982 .collect();
983 let matcher = QueryNameMatcher::new(names, opts);
984
985 let record = "@SRR001666.1 071112_SLXA description\nACGT\n+\nXXXX\n";
987 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
988 let read_record = reader.next().unwrap().unwrap();
989 assert!(matcher.read_match(&read_record));
990 }
991
992 #[rstest]
997 #[case("AGTGTGATG", false)]
998 #[case("QRNQRNQRN", true)]
999 #[case("ARNDCEQGHILKMFPSTWYV", true)] #[case("BJOUXZ", true)] fn test_validate_fixed_pattern_is_ok(#[case] pattern: &str, #[case] protein: bool) {
1002 let result = validate_fixed_pattern(pattern, protein);
1003 assert!(result.is_ok());
1004 }
1005
1006 #[rstest]
1007 #[case(
1008 "AXGTGTGATG",
1009 false,
1010 "Fixed pattern must contain only DNA bases: A .. [X] .. GTGTGATG"
1011 )]
1012 #[case(
1013 "QRN1QRN",
1014 true,
1015 "Fixed pattern must contain only amino acids: QRN .. [1] .. QRN"
1016 )]
1017 fn test_validate_fixed_pattern_error(
1018 #[case] pattern: &str,
1019 #[case] protein: bool,
1020 #[case] msg: &str,
1021 ) {
1022 let result = validate_fixed_pattern(pattern, protein);
1023 let inner = result.unwrap_err().to_string();
1024 assert_eq!(inner, msg);
1025 }
1026
1027 #[test]
1028 fn test_validate_fixed_pattern_rejects_non_ascii() {
1029 let pattern = "AC\u{0141}T";
1032 let result = validate_fixed_pattern(pattern, false);
1033 assert!(result.is_err());
1034 assert!(
1035 result
1036 .unwrap_err()
1037 .to_string()
1038 .contains("Fixed pattern must contain only DNA bases")
1039 );
1040 }
1041
1042 #[test]
1047 fn test_bitmask_matcher_dna_only_has_no_iupac() {
1048 let opts = MatcherOpts {
1049 invert_match: false,
1050 reverse_complement: false,
1051 };
1052 let bitenc = encode(b"ACGT");
1053 let matcher = BitMaskMatcher::new(bitenc, opts);
1054 assert!(
1055 !matcher.has_iupac,
1056 "DNA-only pattern should have has_iupac=false"
1057 );
1058 }
1059
1060 #[test]
1061 fn test_bitmask_matcher_iupac_pattern_has_iupac() {
1062 let opts = MatcherOpts {
1063 invert_match: false,
1064 reverse_complement: false,
1065 };
1066 let bitenc = encode(b"ACRT");
1067 let matcher = BitMaskMatcher::new(bitenc, opts);
1068 assert!(
1069 matcher.has_iupac,
1070 "IUPAC pattern should have has_iupac=true"
1071 );
1072 }
1073
1074 #[test]
1079 fn test_bitmask_matcher_read_shorter_than_pattern() {
1080 let opts = MatcherOpts {
1081 invert_match: false,
1082 reverse_complement: false,
1083 };
1084 let bitenc = encode(b"ACGTACGT");
1085 let matcher = BitMaskMatcher::new(bitenc, opts);
1086 let seq = "ACG";
1087 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
1088 let record = format!("@id\n{seq}\n+\n{qual}\n");
1089 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1090 let read_record = reader.next().unwrap().unwrap();
1091 assert!(!matcher.read_match(&read_record));
1092 }
1093
1094 #[test]
1095 fn test_bitmask_set_matcher_read_shorter_than_pattern() {
1096 let opts = MatcherOpts {
1097 invert_match: false,
1098 reverse_complement: false,
1099 };
1100 let bitencs = vec![encode(b"ACGTACGT")];
1101 let matcher = BitMaskSetMatcher::new(bitencs, opts);
1102 let seq = "ACG";
1103 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
1104 let record = format!("@id\n{seq}\n+\n{qual}\n");
1105 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1106 let read_record = reader.next().unwrap().unwrap();
1107 assert!(!matcher.read_match(&read_record));
1108 }
1109
1110 #[rstest]
1115 #[case(false, "AG", "AGG", true)]
1116 #[case(false, "CC", "AGG", false)]
1117 #[case(true, "CC", "AGG", true)]
1118 #[case(true, "TT", "AGG", false)]
1119 #[case(false, "AT", "ATGAT", true)]
1120 #[case(true, "CG", "GCCG", true)]
1121 #[case(false, "AGAG", "AGAGAGAG", true)]
1122 #[case(true, "TCTC", "AGAGAGAG", true)]
1123 fn test_bitmask_matcher_dna_read_match(
1124 #[case] reverse_complement: bool,
1125 #[case] pattern: &str,
1126 #[case] seq: &str,
1127 #[case] expected: bool,
1128 ) {
1129 let invert_matches = [true, false];
1130 for invert_match in IntoIterator::into_iter(invert_matches) {
1131 let opts = MatcherOpts {
1132 invert_match,
1133 reverse_complement,
1134 };
1135 let matcher = BitMaskMatcher::new(encode(pattern.as_bytes()), opts);
1136 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
1137 let record = format!("@id\n{seq}\n+\n{qual}\n");
1138 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1139 let read_record = reader.next().unwrap().unwrap();
1140 let result = matcher.read_match(&read_record);
1141 if invert_match {
1142 assert_ne!(result, expected);
1143 } else {
1144 assert_eq!(result, expected);
1145 }
1146 }
1147 }
1148
1149 #[rstest]
1154 #[case(false, "GATK", "GATG", true)]
1155 #[case(false, "GATK", "GATT", true)]
1156 #[case(false, "GATK", "GATA", false)]
1157 #[case(false, "GATK", "GATC", false)]
1158 #[case(false, "GATR", "GATGA", true)]
1159 #[case(false, "GATR", "GATA", true)]
1160 #[case(false, "GATR", "GATGT", true)]
1161 #[case(false, "N", "A", true)]
1162 #[case(false, "N", "C", true)]
1163 #[case(false, "N", "G", true)]
1164 #[case(false, "N", "T", true)]
1165 #[case(false, "ANA", "ACA", true)]
1166 #[case(false, "ANA", "AAA", true)]
1167 #[case(true, "GATK", "CATC", true)]
1168 fn test_bitmask_matcher_iupac_read_match(
1169 #[case] reverse_complement: bool,
1170 #[case] pattern: &str,
1171 #[case] seq: &str,
1172 #[case] expected: bool,
1173 ) {
1174 let opts = MatcherOpts {
1175 invert_match: false,
1176 reverse_complement,
1177 };
1178 let matcher = BitMaskMatcher::new(encode(pattern.as_bytes()), opts);
1179 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
1180 let record = format!("@id\n{seq}\n+\n{qual}\n");
1181 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1182 let read_record = reader.next().unwrap().unwrap();
1183 assert_eq!(
1184 matcher.read_match(&read_record),
1185 expected,
1186 "pattern={} seq={} revcomp={}",
1187 pattern,
1188 seq,
1189 reverse_complement
1190 );
1191 }
1192
1193 #[rstest]
1198 #[case(false, vec!["A", "AGG", "G"], "AGGG", true)]
1199 #[case(true, vec!["A", "AGG", "G"], "TCCC", true)]
1200 #[case(false, vec!["A", "AGG", "G"], "TTTT", false)]
1201 #[case(true, vec!["T", "AAA"], "CCCCC", false)]
1202 #[case(false, vec!["AGG", "C", "TT"], "AGGTT", true)]
1203 #[case(true, vec!["AGG", "C", "TT"], "GGGGG", true)]
1204 #[case(false, vec!["AC", "TT"], "TTACGTT", true)]
1205 #[case(true, vec!["GT", "AA"], "TTACGTT", true)]
1206 #[case(false, vec!["GAGA","AGTT"], "GAGAGTT", true)]
1207 #[case(true, vec!["CTCT","AACT"], "GAGAGTT", true)]
1208 fn test_bitmask_set_matcher_dna_read_match(
1209 #[case] reverse_complement: bool,
1210 #[case] patterns: Vec<&str>,
1211 #[case] seq: &str,
1212 #[case] expected: bool,
1213 ) {
1214 let invert_matches = [true, false];
1215 for invert_match in IntoIterator::into_iter(invert_matches) {
1216 let opts = MatcherOpts {
1217 invert_match,
1218 reverse_complement,
1219 };
1220 let bitencs: Vec<_> = patterns.iter().map(|p| encode(p.as_bytes())).collect();
1221 let matcher = BitMaskSetMatcher::new(bitencs, opts);
1222 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
1223 let record = format!("@id\n{seq}\n+\n{qual}\n");
1224 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1225 let read_record = reader.next().unwrap().unwrap();
1226 let result = matcher.read_match(&read_record);
1227 if invert_match {
1228 assert_ne!(result, expected);
1229 } else {
1230 assert_eq!(result, expected);
1231 }
1232 }
1233 }
1234
1235 #[rstest]
1240 #[case(false, vec!["GATK", "ANA"], "GATG", true)]
1241 #[case(false, vec!["GATK", "ANA"], "ACA", true)]
1242 #[case(false, vec!["GATK", "ANA"], "CCCC", false)]
1243 #[case(false, vec!["R", "Y"], "A", true)]
1244 #[case(false, vec!["R", "Y"], "C", true)]
1245 #[case(false, vec!["R", "Y"], "AAAA", true)]
1246 fn test_bitmask_set_matcher_iupac_read_match(
1247 #[case] reverse_complement: bool,
1248 #[case] patterns: Vec<&str>,
1249 #[case] seq: &str,
1250 #[case] expected: bool,
1251 ) {
1252 let opts = MatcherOpts {
1253 invert_match: false,
1254 reverse_complement,
1255 };
1256 let bitencs: Vec<_> = patterns.iter().map(|p| encode(p.as_bytes())).collect();
1257 let matcher = BitMaskSetMatcher::new(bitencs, opts);
1258 let qual = (0..seq.len()).map(|_| "X").collect::<String>();
1259 let record = format!("@id\n{seq}\n+\n{qual}\n");
1260 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1261 let read_record = reader.next().unwrap().unwrap();
1262 assert_eq!(
1263 matcher.read_match(&read_record),
1264 expected,
1265 "patterns={:?} seq={} revcomp={}",
1266 patterns,
1267 seq,
1268 reverse_complement
1269 );
1270 }
1271
1272 fn read_match_seq(matcher: &dyn Matcher, seq: &str) -> bool {
1278 let qual = "X".repeat(seq.len());
1279 let record = format!("@id\n{seq}\n+\n{qual}\n");
1280 let mut reader = seq_io::fastq::Reader::new(record.as_bytes());
1281 let rec = reader.next().unwrap().unwrap();
1282 matcher.read_match(&rec)
1283 }
1284
1285 #[test]
1286 fn test_rc_match_simple() {
1287 let opts_fwd = MatcherOpts {
1289 invert_match: false,
1290 reverse_complement: false,
1291 };
1292 let opts_rc = MatcherOpts {
1293 invert_match: false,
1294 reverse_complement: true,
1295 };
1296 let matcher_fwd = FixedStringMatcher::new("AAA", opts_fwd);
1297 let matcher_rc = FixedStringMatcher::new("AAA", opts_rc);
1298
1299 assert!(read_match_seq(&matcher_fwd, "AAATTTCCC"));
1300 assert!(read_match_seq(&matcher_rc, "AAATTTCCC"));
1301 }
1302
1303 #[test]
1304 fn test_rc_match_only_in_rc() {
1305 let opts_fwd = MatcherOpts {
1307 invert_match: false,
1308 reverse_complement: false,
1309 };
1310 let opts_rc = MatcherOpts {
1311 invert_match: false,
1312 reverse_complement: true,
1313 };
1314 let matcher_fwd = FixedStringMatcher::new("TTT", opts_fwd);
1315 let matcher_rc = FixedStringMatcher::new("TTT", opts_rc);
1316
1317 assert!(!read_match_seq(&matcher_fwd, "CCCGGGAAA"));
1318 assert!(read_match_seq(&matcher_rc, "CCCGGGAAA"));
1319 }
1320
1321 #[test]
1322 fn test_rc_match_at_end() {
1323 let opts_fwd = MatcherOpts {
1325 invert_match: false,
1326 reverse_complement: false,
1327 };
1328 let opts_rc = MatcherOpts {
1329 invert_match: false,
1330 reverse_complement: true,
1331 };
1332 let matcher_fwd = FixedStringMatcher::new("AAA", opts_fwd);
1333 let matcher_rc = FixedStringMatcher::new("AAA", opts_rc);
1334
1335 assert!(!read_match_seq(&matcher_fwd, "CCCGGGTTT"));
1336 assert!(read_match_seq(&matcher_rc, "CCCGGGTTT"));
1337 }
1338
1339 #[test]
1340 fn test_rc_match_palindrome() {
1341 let opts_fwd = MatcherOpts {
1343 invert_match: false,
1344 reverse_complement: false,
1345 };
1346 let opts_rc = MatcherOpts {
1347 invert_match: false,
1348 reverse_complement: true,
1349 };
1350 let matcher_fwd = FixedStringMatcher::new("ACGT", opts_fwd);
1351 let matcher_rc = FixedStringMatcher::new("ACGT", opts_rc);
1352
1353 assert!(read_match_seq(&matcher_fwd, "ACGTACGTACGT"));
1354 assert!(read_match_seq(&matcher_rc, "ACGTACGTACGT"));
1355 }
1356
1357 #[test]
1358 fn test_rc_match_single_base() {
1359 let opts_fwd = MatcherOpts {
1361 invert_match: false,
1362 reverse_complement: false,
1363 };
1364 let opts_rc = MatcherOpts {
1365 invert_match: false,
1366 reverse_complement: true,
1367 };
1368 let matcher_fwd = FixedStringMatcher::new("T", opts_fwd);
1369 let matcher_rc = FixedStringMatcher::new("T", opts_rc);
1370
1371 assert!(!read_match_seq(&matcher_fwd, "AAAA"));
1372 assert!(read_match_seq(&matcher_rc, "AAAA"));
1373 }
1374
1375 use proptest::prelude::*;
1380
1381 fn dna_sequence() -> impl Strategy<Value = Vec<u8>> {
1383 prop::collection::vec(prop::sample::select(b"ACGT".to_vec()), 1..100)
1384 }
1385
1386 fn dna_pattern() -> impl Strategy<Value = Vec<u8>> {
1388 prop::collection::vec(prop::sample::select(b"ACGT".to_vec()), 1..20)
1389 }
1390
1391 proptest! {
1392 #![proptest_config(ProptestConfig::with_cases(20))]
1393
1394 #[test]
1396 fn prop_reverse_complement_involutive(seq in dna_sequence()) {
1397 let rc = reverse_complement(&seq);
1398 let rc_rc = reverse_complement(&rc);
1399 prop_assert_eq!(seq, rc_rc);
1400 }
1401
1402 #[test]
1404 fn prop_sequence_matches_itself(seq in dna_sequence()) {
1405 let opts = MatcherOpts {
1406 invert_match: false,
1407 reverse_complement: false,
1408 };
1409 let matcher = FixedStringMatcher::new(
1410 std::str::from_utf8(&seq).unwrap(),
1411 opts
1412 );
1413 prop_assert!(matcher.bases_match(&seq));
1414 }
1415
1416 #[test]
1418 fn prop_rc_matching_consistent(seq in dna_sequence(), pattern in dna_pattern()) {
1419 let opts_forward = MatcherOpts {
1420 invert_match: false,
1421 reverse_complement: false,
1422 };
1423 let opts_rc = MatcherOpts {
1424 invert_match: false,
1425 reverse_complement: true,
1426 };
1427
1428 let pattern_str = std::str::from_utf8(&pattern).unwrap();
1429 let seq_str = std::str::from_utf8(&seq).unwrap();
1430 let matcher_forward = FixedStringMatcher::new(pattern_str, opts_forward);
1431 let matcher_rc = FixedStringMatcher::new(pattern_str, opts_rc);
1432
1433 let matches_forward = read_match_seq(&matcher_forward, seq_str);
1434 let matches_with_rc = read_match_seq(&matcher_rc, seq_str);
1435
1436 prop_assert!(
1439 !matches_forward || matches_with_rc,
1440 "forward matched but RC-enabled did not"
1441 );
1442 }
1443
1444 #[test]
1446 fn prop_invert_match_negates(seq in dna_sequence(), pattern in dna_pattern()) {
1447 let opts_normal = MatcherOpts {
1448 invert_match: false,
1449 reverse_complement: false,
1450 };
1451 let opts_inverted = MatcherOpts {
1452 invert_match: true,
1453 reverse_complement: false,
1454 };
1455
1456 let pattern_str = std::str::from_utf8(&pattern).unwrap();
1457 let matcher_normal = FixedStringMatcher::new(pattern_str, opts_normal);
1458 let matcher_inverted = FixedStringMatcher::new(pattern_str, opts_inverted);
1459
1460 let matches_normal = matcher_normal.bases_match(&seq);
1461 let matches_inverted = matcher_inverted.bases_match(&seq);
1462
1463 prop_assert_ne!(matches_normal, matches_inverted);
1464 }
1465
1466 #[test]
1468 fn prop_fixed_and_regex_agree(seq in dna_sequence(), pattern in dna_pattern()) {
1469 let opts = MatcherOpts {
1470 invert_match: false,
1471 reverse_complement: false,
1472 };
1473
1474 let pattern_str = std::str::from_utf8(&pattern).unwrap();
1475 let fixed_matcher = FixedStringMatcher::new(pattern_str, opts);
1476
1477 if let Ok(regex_matcher) = RegexMatcher::new(pattern_str, opts) {
1478 let fixed_result = fixed_matcher.bases_match(&seq);
1479 let regex_result = regex_matcher.bases_match(&seq);
1480 prop_assert_eq!(fixed_result, regex_result,
1481 "Fixed and regex matchers disagree on pattern '{}' and sequence '{}'",
1482 pattern_str, std::str::from_utf8(&seq).unwrap()
1483 );
1484 }
1485 }
1486 }
1487}