1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
use alloc::vec::Vec;

use crate::{kmp_find_with_lsp_table, kmp_table};

/// Matches a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] returning
/// a vector of positions in the `haystack`.
///
/// For matches that overlap, only the indices corresponding to the first match are returned.
///
/// Note: This function generates a `longest suffix-prefix` table prior to matching.
/// If multiple haystacks are to be matched using the same needle, consider first manually generating
/// said table using [`kmp_table`] and then matching each haystack using [`kmp_match_with_lsp_table`].
///
/// # Examples
///
/// ```
/// use kmp::kmp_match;
///
/// assert_eq!(kmp_match(&['y'], &['h', 'a', 'y', 's', 't', 'a', 'c', 'k']), vec![2]);
/// assert_eq!(kmp_match(&[1, 2, 1], &[1, 2, 1, 2, 1]), vec![0]);
/// assert_eq!(kmp_match(&[true, false, true], &[true, false]), vec![]);
/// assert_eq!(kmp_match(&[Some(-1), Some(-2)], &[Some(-1), Some(-2), None, Some(-2), Some(-1), Some(-2)]), vec![0, 4]);
///
/// // because the needle and haystack only rely on being `PartialEq`, they can have different types
/// // hence rustc can not infer the type of `needle` and we have to define it
/// let empty_needle: &[&str; 0] = &[];
/// assert_eq!(kmp_match(empty_needle, &["some", "words"]), vec![0, 1, 2]);
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_match_with_lsp_table`]: ./fn.kmp_match_with_lsp_table.html
pub fn kmp_match<N, H>(needle: &[N], haystack: &[H]) -> Vec<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    kmp_match_with_lsp_table(needle, haystack, &kmp_table(needle))
}

/// Matches a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] with a given
/// `longest suffix-prefix` table generated by [`kmp_table`] and returns a vector of positions in the `haystack`.
///
/// For matches that overlap, only the indices corresponding to the first match are returned.
///
/// If you're only matching in a single haystack consider using [`kmp_match`] for convenience.
///
/// # Examples
///
/// ```
/// use kmp::{kmp_match_with_lsp_table, kmp_table};
///
/// let needle = &['a', 'b'];
/// let table = kmp_table(needle);
///
/// assert_eq!(table, vec![0, 0]);
///
/// assert_eq!(kmp_match_with_lsp_table(needle, &['a', 'a', 'b', 'a', 'c', 'd', 'a', 'b', 'd'], &table), vec![1, 6]);
/// assert_eq!(kmp_match_with_lsp_table(needle, &['d', 'c', 'a', 'b', 'a', 'a', 'b', 'a'], &table), vec![2, 5]);
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_match`]: ./fn.kmp_match.html
pub fn kmp_match_with_lsp_table<N, H>(needle: &[N], haystack: &[H], lsp: &[usize]) -> Vec<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    if needle.is_empty() {
        // if the needle is empty, every "in between" characters is a match
        // note: this is here just to replicate std::str::match_indices() behaviour
        return (0..=haystack.len()).collect();
    }

    let mut start = 0;
    let mut locations = Vec::new();

    #[allow(clippy::indexing_slicing, clippy::integer_arithmetic)]
    while let Some(position) = kmp_find_with_lsp_table(needle, &haystack[start..], lsp) {
        let position = position + start;
        start = position + needle.len();
        locations.push(position);
        if start > haystack.len() - (needle.len() - 1) {
            break;
        }
    }

    locations
}

#[cfg(test)]
mod tests {
    use alloc::vec;

    use proptest::prelude::*;

    use crate::kmp_match;

    #[test]
    fn basic() {
        assert_eq!(
            vec![0, 6, 12],
            kmp_match(
                &['a', 'b', 'c'],
                &['a', 'b', 'c', 'X', 'X', 'X', 'a', 'b', 'c', 'Y', 'Y', 'Y', 'a', 'b', 'c'],
            )
        );
    }

    #[test]
    fn concatenated() {
        assert_eq!(
            vec![1, 4],
            kmp_match(&['a', 'b', 'c'], &['1', 'a', 'b', 'c', 'a', 'b', 'c', '2'])
        );
    }

    #[test]
    fn combined() {
        assert_eq!(
            vec![1],
            kmp_match(&['a', 'b', 'a'], &['1', 'a', 'b', 'a', 'b', 'a', '2'])
        );
    }

    #[test]
    fn empty_needle() {
        assert_eq!(vec![0, 1, 2], kmp_match(&[], &['a', 'b']));
    }

    #[test]
    fn empty_haystack() {
        // we have to help with type inference here
        let empty_haystack: &[char; 0] = &[];
        assert!(kmp_match(&['a', 'b', 'c'], empty_haystack).is_empty());
    }

    #[test]
    fn empty_both() {
        // we have to help with type inference here
        let empty_needle: &[char; 0] = &[];
        let empty_haystack: &[char; 0] = &[];
        assert_eq!(vec![0], kmp_match(empty_needle, empty_haystack));
    }

    #[test]
    fn needle_longer_haystack() {
        assert!(kmp_match(&['a', 'b', 'c'], &['a', 'b']).is_empty());
    }

    proptest! {
        #[ignore]
        #[test]
        fn fuzz_input(needle in prop::collection::vec(".*", 0..100), haystack in prop::collection::vec(".*", 0..100)) {
            kmp_match(&needle, &haystack);
        }
    }
}