pattern_matching/single_pattern/
bndm.rs

1// Copyright 2020 Alexander Korn
2//
3// Licensed under the MIT license
4
5use crate::single_pattern::shift_and::shift_and_single_masks;
6
7/// An implementation of the Backward Nondeterministic DAWG Matching
8/// algorithm (BNDM).
9///
10/// Takes a `pattern`, and text `text`.
11///
12/// If the given text contains the given pattern, the algorithm returns all
13/// indexes of the first letters of the found occurrences.
14/// If the pattern could not be found in the text, an empty vector is returned.
15///
16/// # Runtime
17///
18/// The worst case runtime is `O(n * m)`, with `n` being the length of the text
19/// and `m` being the length of the pattern.
20///
21/// # When to Use It
22///
23/// The algorithm works best if the pattern length is not larger than the bit
24/// width of a register on your system.
25///
26/// # How It Works
27///
28/// It uses the same function to generate shift masks as the Shift-And algorithm,
29/// but reverses the pattern before passing it to the mentioned function. Also,
30/// it scans the pattern in the current search window from right to left and as
31/// soon as the pattern does not match the current window determines how far the
32/// next jump (or shift of the search window) needs to be.
33pub fn bndm(pattern: &[u8], text: &[u8]) -> Vec<usize> {
34    let mut pattern_rev = pattern.to_vec();
35    pattern_rev.reverse();
36
37    let (masks, _, accept) = shift_and_single_masks(&pattern_rev);
38
39    bndm_with_masks(text, &masks, accept, pattern.len())
40}
41
42/// An implementation of the Backward Nondeterminstic DAWG Matching
43/// algorithm (BNDM) using already prepared shift masks.
44fn bndm_with_masks(text: &[u8], masks: &[usize], accept: usize, m: usize) -> Vec<usize> {
45    let n = text.len();
46    let mut window: usize = m;
47    let mut active: usize;
48    let mut j: usize;
49    let mut lastsuffix: usize;
50
51    let mut matches: Vec<usize> = Vec::new();
52
53    while window <= n {
54        active = (1 << m) - 1;
55        j = 1;
56        lastsuffix = 0;
57
58        while active != 0 {
59            active &= masks[text[window - j] as usize];
60
61            if active & accept != 0 {
62                if j == m {
63                    matches.push(window - m);
64                    break;
65                } else {
66                    lastsuffix = j;
67                }
68            }
69
70            j += 1;
71            active <<= 1;
72        }
73
74        window += m - lastsuffix;
75    }
76
77    matches
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    #[test]
85    fn test_bndm() {
86        let text = "gccttaacattattacgccta".as_bytes();
87        let pattern = "tta".as_bytes();
88
89        let mut matches = bndm(pattern, text);
90        matches.sort_unstable();
91
92        let matches_correct = vec![3, 9, 12];
93
94        assert_eq!(matches, matches_correct);
95    }
96}