pattern_matching/single_pattern/bndm.rs
1// Copyright 2020 Alexander Korn
2//
3// Licensed under the MIT license
4
5use crate::single_pattern::shift_and::shift_and_single_masks;
6
7/// An implementation of the Backward Nondeterministic DAWG Matching
8/// algorithm (BNDM).
9///
10/// Takes a `pattern`, and text `text`.
11///
12/// If the given text contains the given pattern, the algorithm returns all
13/// indexes of the first letters of the found occurrences.
14/// If the pattern could not be found in the text, an empty vector is returned.
15///
16/// # Runtime
17///
18/// The worst case runtime is `O(n * m)`, with `n` being the length of the text
19/// and `m` being the length of the pattern.
20///
21/// # When to Use It
22///
23/// The algorithm works best if the pattern length is not larger than the bit
24/// width of a register on your system.
25///
26/// # How It Works
27///
28/// It uses the same function to generate shift masks as the Shift-And algorithm,
29/// but reverses the pattern before passing it to the mentioned function. Also,
30/// it scans the pattern in the current search window from right to left and as
31/// soon as the pattern does not match the current window determines how far the
32/// next jump (or shift of the search window) needs to be.
33pub fn bndm(pattern: &[u8], text: &[u8]) -> Vec<usize> {
34 let mut pattern_rev = pattern.to_vec();
35 pattern_rev.reverse();
36
37 let (masks, _, accept) = shift_and_single_masks(&pattern_rev);
38
39 bndm_with_masks(text, &masks, accept, pattern.len())
40}
41
42/// An implementation of the Backward Nondeterminstic DAWG Matching
43/// algorithm (BNDM) using already prepared shift masks.
44fn bndm_with_masks(text: &[u8], masks: &[usize], accept: usize, m: usize) -> Vec<usize> {
45 let n = text.len();
46 let mut window: usize = m;
47 let mut active: usize;
48 let mut j: usize;
49 let mut lastsuffix: usize;
50
51 let mut matches: Vec<usize> = Vec::new();
52
53 while window <= n {
54 active = (1 << m) - 1;
55 j = 1;
56 lastsuffix = 0;
57
58 while active != 0 {
59 active &= masks[text[window - j] as usize];
60
61 if active & accept != 0 {
62 if j == m {
63 matches.push(window - m);
64 break;
65 } else {
66 lastsuffix = j;
67 }
68 }
69
70 j += 1;
71 active <<= 1;
72 }
73
74 window += m - lastsuffix;
75 }
76
77 matches
78}
79
80#[cfg(test)]
81mod tests {
82 use super::*;
83
84 #[test]
85 fn test_bndm() {
86 let text = "gccttaacattattacgccta".as_bytes();
87 let pattern = "tta".as_bytes();
88
89 let mut matches = bndm(pattern, text);
90 matches.sort_unstable();
91
92 let matches_correct = vec![3, 9, 12];
93
94 assert_eq!(matches, matches_correct);
95 }
96}