pattern_matching/single_pattern/
horspool.rs

1fn horspool_shift(pattern: &[u8]) -> Vec<usize> {
2    let mut shift = vec![pattern.len(); 256];
3    let m = pattern.len();
4
5    // Iterate over 0..m - 1
6    for (j, c) in pattern.iter().enumerate().take(m - 1) {
7        shift[*c as usize] = m - 1 - j;
8    }
9
10    shift
11}
12
13/// An implementation of the Horspool algorithm.
14///
15/// Takes a `pattern`, a text `text` and a value `i0` specifying at which index
16/// of the text it should start to search for the next occurence.
17///
18/// If the given text contains the given pattern, the algorithm returns the
19/// index `i` of the first letter of the first occurrence with `i >= i0`.
20/// If the pattern could not be found in the text, `None` is returned.
21///
22/// This algorithm terminates after finding one occurrence of the given pattern
23/// in the given text. If you want to find all occurrences, consider using
24/// [`horspool_all`](horspool_all) instead.
25pub fn horspool(pattern: &[u8], text: &[u8], i0: usize) -> Option<usize> {
26    let m = pattern.len();
27    let n = text.len();
28
29    let shift = horspool_shift(pattern);
30    let mut last = i0 + m - 1;
31    let p_last = pattern[m - 1];
32
33    loop {
34        while last < n && text[last] != p_last {
35            last += shift[text[last] as usize];
36        }
37
38        if last >= n {
39            break;
40        }
41
42        if text[last - (m - 1)..last] == pattern[0..m - 1] {
43            return Some(last - m + 1);
44        }
45
46        last += shift[p_last as usize];
47    }
48
49    None
50}
51
52/// A simple algorithm for matching a single pattern on a text returning all occurrences.
53///
54/// Takes a `pattern`, and text `text`.
55///
56/// If the given text contains the given pattern, the algorithm returns the
57/// indexes of the first letters of all found occurrences.
58/// If the pattern could not be found in the text, an empty vector is returned.
59pub fn horspool_all(pattern: &[u8], text: &[u8]) -> Vec<usize> {
60    let mut res = Vec::new();
61    let mut i0 = 0;
62
63    while let Some(occ) = horspool(pattern, text, i0) {
64        res.push(occ);
65
66        i0 = occ + 1; // TODO or `+ m`?
67    }
68
69    res
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn test_horspool_all() {
78        let text = "gccttaacattattacgccta".as_bytes();
79        let pattern = "tta".as_bytes();
80
81        let mut matches = horspool_all(pattern, text);
82        matches.sort_unstable();
83
84        let matches_correct = vec![3, 9, 12];
85
86        assert_eq!(matches, matches_correct);
87    }
88}