1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
use alloc::{vec, vec::Vec};

/// Generate a list of possible fallback positions for the needle, called a `longest suffix-prefix` table.
/// This can also be used if multiple haystacks are to be searched with the same needle,
/// reducing the amount of table generation to one. Functions using this table all end with
/// `_with_lsp_table`.
#[allow(clippy::indexing_slicing)]
pub fn kmp_table<N>(needle: &[N]) -> Vec<usize>
where
    N: PartialEq,
{
    if needle.is_empty() {
        return vec![];
    }

    let mut lsp = Vec::with_capacity(needle.len());
    lsp.push(0);

    for needle_char in &needle[1..] {
        #[allow(clippy::option_unwrap_used)]
        let mut distance: usize = *lsp.last().unwrap();

        #[allow(clippy::integer_arithmetic)]
        while distance > 0 && *needle_char != needle[distance] {
            distance = lsp[distance - 1];
        }

        if *needle_char == needle[distance] {
            distance += 1;
        }

        lsp.push(distance);
    }

    lsp
}

#[cfg(test)]
mod tests {
    use alloc::vec;

    use proptest::prelude::*;

    use crate::kmp_table;

    #[test]
    fn generation() {
        let empty_needle: &[char; 0] = &[];
        assert!(kmp_table(empty_needle).is_empty());
    }

    #[test]
    fn repeating() {
        assert_eq!(vec![0, 1, 2, 3, 4], kmp_table(&['a', 'a', 'a', 'a', 'a']));
    }

    #[test]
    fn boolean() {
        assert_eq!(
            vec![0, 0, 1, 1, 2, 0, 1],
            kmp_table(&[true, false, true, true, false, false, true])
        );
    }

    #[test]
    fn multiple_chars() {
        assert_eq!(
            vec![0, 0, 1, 0, 1, 2, 3, 2],
            kmp_table(&['a', 'b', 'a', 'c', 'a', 'b', 'a', 'b'])
        );
    }

    #[test]
    fn two_chars_with_repetitions() {
        assert_eq!(
            vec![0, 1, 2, 0, 1, 2, 3, 3, 3, 4],
            kmp_table(&['a', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'a', 'b'])
        );
    }

    proptest! {
        #[ignore]
        #[test]
        fn fuzz_input(characters in prop::collection::vec(".*", 0..100)) {
            kmp_table(&characters);
        }
    }
}