1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
use crate::kmp_table;

/// Finds a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`].
///
/// Returns `None` if the needle could not be found.
///
/// Note: This function generates a `longest suffix-prefix` table prior to searching.
/// If multiple haystacks are to be searched using the same needle, consider first manually generating
/// said table using [`kmp_table`] and then searching each haystack using [`kmp_find_with_lsp_table`].
///
/// # Examples
///
/// ```
/// use kmp::kmp_find;
///
/// assert_eq!(kmp_find(&['y'], &['h', 'a', 'y', 's', 't', 'a', 'c', 'k']), Some(2));
/// assert_eq!(kmp_find(&[1, 2, 1], &[1, 2, 1, 2, 1]), Some(0));
/// assert_eq!(kmp_find(&[true, false, true], &[true, false]), None);
///
/// // because the needle and haystack only rely on being `PartialEq`, they can have different types
/// // hence rustc can not infer the type of `needle` and we have to define it
/// let empty_needle: &[&str; 0] = &[];
/// assert_eq!(kmp_find(empty_needle, &["some", "words"]), Some(0));
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_find_with_lsp_table`]: ./fn.kmp_find_with_lsp_table.html
pub fn kmp_find<N, H>(needle: &[N], haystack: &[H]) -> Option<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    kmp_find_with_lsp_table(needle, haystack, &kmp_table(needle))
}

/// Finds a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] with a given
/// `longest suffix-prefix` table generated by [`kmp_table`].
///
/// Returns `None` if the needle could not be found.
///
/// If you're only searching in a single haystack consider using `kmp_find` for convenience.
///
/// # Examples
///
/// ```
/// use kmp::{kmp_find_with_lsp_table, kmp_table};
///
/// let needle = &['a', 'a', 'b', 'a'];
/// let table = kmp_table(needle);
///
/// assert_eq!(table, vec![0, 1, 0, 1]);
///
/// assert_eq!(kmp_find_with_lsp_table(needle, &['a', 'a', 'b', 'a', 'c', 'd', 'a', 'd'], &table), Some(0));
/// assert_eq!(kmp_find_with_lsp_table(needle, &['d', 'c', 'a', 'b', 'a', 'a', 'b', 'a'], &table), Some(4));
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_find`]: ./fn.kmp_find.html
#[allow(clippy::indexing_slicing)]
pub fn kmp_find_with_lsp_table<N, H>(needle: &[N], haystack: &[H], lsp: &[usize]) -> Option<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    if needle.is_empty() {
        return Some(0);
    }

    if needle.len() > haystack.len() {
        return None;
    }

    let mut needle_pos: usize = 0;

    #[allow(clippy::integer_arithmetic)]
    for (haystack_pos, haystack_char) in haystack.iter().enumerate() {
        while needle_pos > 0 && *haystack_char != needle[needle_pos] {
            // character mismatch, move backwards in the needle
            needle_pos = lsp[needle_pos - 1];
        }

        if *haystack_char == needle[needle_pos] {
            // char matches, move to next needle character
            needle_pos += 1;

            if needle_pos == needle.len() {
                // we found all needle characters in the haystack, return position in haystack
                return Some(haystack_pos - (needle_pos - 1));
            }
        }
    }

    None
}

#[cfg(test)]
mod tests {
    use proptest::prelude::*;

    use crate::kmp_find;

    #[test]
    fn basic() {
        assert_eq!(
            Some(6),
            kmp_find(
                &['a', 'a', 'a', 'b'],
                &['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'b']
            )
        )
    }

    #[test]
    fn empty_needle() {
        assert_eq!(Some(0), kmp_find(&[], &['a', 'b', 'c', 'd', 'e']));
    }

    #[test]
    fn empty_haystack() {
        // we have to help with type inference here
        let empty_haystack: &[char; 0] = &[];
        assert_eq!(None, kmp_find(&['a', 'b', 'c'], empty_haystack));
    }

    #[test]
    fn empty_both() {
        // we have to help with type inference here
        let empty_needle: &[char; 0] = &[];
        let empty_haystack: &[char; 0] = &[];
        assert_eq!(Some(0), kmp_find(empty_needle, empty_haystack));
    }

    #[test]
    fn needle_longer_haystack() {
        assert_eq!(None, kmp_find(&['a', 'b', 'c'], &['a', 'b']));
    }

    proptest! {
        #[ignore]
        #[test]
        fn fuzz_input(needle in prop::collection::vec(".*", 0..100), haystack in prop::collection::vec(".*", 0..100)) {
            kmp_find(&needle, &haystack);
        }
    }
}