pattern_matching/single_pattern/
shift_and.rs

1// Copyright 2020 Alexander Korn
2//
3// Licensed under the MIT license
4
5pub(crate) fn shift_and_single_masks(pattern: &[u8]) -> (Vec<usize>, usize, usize) {
6    let mut masks = vec![0; 256];
7    let mut bit = 1;
8
9    for c in pattern {
10        masks[*c as usize] |= bit;
11
12        bit *= 2;
13    }
14
15    (masks, 1, bit / 2)
16}
17
18fn shift_and_with_masks(
19    text: &[u8],
20    masks: &[usize],
21    ones: usize,
22    accept: usize,
23) -> Vec<(usize, usize)> {
24    let mut res = Vec::new();
25    let mut active: usize = 0;
26
27    for (i, c) in text.iter().enumerate() {
28        active = ((active << 1) | ones) & masks[*c as usize];
29
30        let found = active & accept;
31        if found != 0 {
32            res.push((i, found));
33        }
34    }
35
36    res
37}
38
39/// An implementation of the Shift-And algorithm.
40///
41/// Takes a `pattern`, and text `text`.
42///
43/// If the given text contains the given pattern, the algorithm returns the
44/// indexes of the first letters of all found occurrences.
45/// If the pattern could not be found in the text, an empty vector is returned.
46///
47/// # Runtime
48///
49/// The worst case runtime is `O(n * m)`, with `n` being the length of the text
50/// and `m` being the length of the pattern.
51///
52/// # When to Use It
53///
54/// The algorithm can be useful if you want to match short patterns.
55///
56/// # How It Works
57///
58/// The algorithm works by simulating a nondeterministic finite automaton (NFA)
59/// in memory using cheap bit operations.
60pub fn shift_and(pattern: &[u8], text: &[u8]) -> Vec<usize> {
61    let mut res = Vec::new();
62    let m = pattern.len();
63    let (mask, ones, accept) = shift_and_single_masks(pattern);
64
65    for (i, _) in shift_and_with_masks(text, &mask, ones, accept) {
66        res.push(i - m + 1);
67    }
68
69    res
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn test_shift_and() {
78        let text = "gccttaacattattacgccta".as_bytes();
79        let pattern = "tta".as_bytes();
80
81        let mut matches = shift_and(pattern, text);
82        matches.sort_unstable();
83
84        let matches_correct = vec![3, 9, 12];
85
86        assert_eq!(matches, matches_correct);
87    }
88}