kmp 0.1.1

Various functions using the Knuth–Morris–Pratt algorithm to efficiently find patterns.
Documentation
use crate::kmp_table;

/// Finds a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`].
///
/// Returns `None` if the needle could not be found.
///
/// Note: This function generates a `longest suffix-prefix` table prior to searching.
/// If multiple haystacks are to be searched using the same needle, consider first manually generating
/// said table using [`kmp_table`] and then searching each haystack using [`kmp_find_with_lsp_table`].
///
/// # Examples
///
/// ```
/// use kmp::kmp_find;
///
/// assert_eq!(kmp_find(&['y'], &['h', 'a', 'y', 's', 't', 'a', 'c', 'k']), Some(2));
/// assert_eq!(kmp_find(&[1, 2, 1], &[1, 2, 1, 2, 1]), Some(0));
/// assert_eq!(kmp_find(&[true, false, true], &[true, false]), None);
///
/// // because the needle and haystack only rely on being `PartialEq`, they can have different types
/// // hence rustc can not infer the type of `needle` and we have to define it
/// let empty_needle: &[&str; 0] = &[];
/// assert_eq!(kmp_find(empty_needle, &["some", "words"]), Some(0));
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_find_with_lsp_table`]: ./fn.kmp_find_with_lsp_table.html
pub fn kmp_find<N, H>(needle: &[N], haystack: &[H]) -> Option<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    kmp_find_with_lsp_table(needle, haystack, &kmp_table(needle))
}

/// Finds a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] with a given
/// `longest suffix-prefix` table generated by [`kmp_table`].
///
/// Returns `None` if the needle could not be found.
///
/// If you're only searching in a single haystack consider using `kmp_find` for convenience.
///
/// # Examples
///
/// ```
/// use kmp::{kmp_find_with_lsp_table, kmp_table};
///
/// let needle = &['a', 'a', 'b', 'a'];
/// let table = kmp_table(needle);
///
/// assert_eq!(table, vec![0, 1, 0, 1]);
///
/// assert_eq!(kmp_find_with_lsp_table(needle, &['a', 'a', 'b', 'a', 'c', 'd', 'a', 'd'], &table), Some(0));
/// assert_eq!(kmp_find_with_lsp_table(needle, &['d', 'c', 'a', 'b', 'a', 'a', 'b', 'a'], &table), Some(4));
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_find`]: ./fn.kmp_find.html
#[allow(clippy::indexing_slicing)]
pub fn kmp_find_with_lsp_table<N, H>(needle: &[N], haystack: &[H], lsp: &[usize]) -> Option<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    if needle.is_empty() {
        return Some(0);
    }

    if needle.len() > haystack.len() {
        return None;
    }

    let mut needle_pos: usize = 0;

    #[allow(clippy::integer_arithmetic)]
    for (haystack_pos, haystack_char) in haystack.iter().enumerate() {
        while needle_pos > 0 && *haystack_char != needle[needle_pos] {
            // character mismatch, move backwards in the needle
            needle_pos = lsp[needle_pos - 1];
        }

        if *haystack_char == needle[needle_pos] {
            // char matches, move to next needle character
            needle_pos += 1;

            if needle_pos == needle.len() {
                // we found all needle characters in the haystack, return position in haystack
                return Some(haystack_pos - (needle_pos - 1));
            }
        }
    }

    None
}

#[cfg(test)]
mod tests {
    use proptest::prelude::*;

    use crate::kmp_find;

    #[test]
    fn basic() {
        assert_eq!(
            Some(6),
            kmp_find(
                &['a', 'a', 'a', 'b'],
                &['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'b']
            )
        )
    }

    #[test]
    fn empty_needle() {
        assert_eq!(Some(0), kmp_find(&[], &['a', 'b', 'c', 'd', 'e']));
    }

    #[test]
    fn empty_haystack() {
        // we have to help with type inference here
        let empty_haystack: &[char; 0] = &[];
        assert_eq!(None, kmp_find(&['a', 'b', 'c'], empty_haystack));
    }

    #[test]
    fn empty_both() {
        // we have to help with type inference here
        let empty_needle: &[char; 0] = &[];
        let empty_haystack: &[char; 0] = &[];
        assert_eq!(Some(0), kmp_find(empty_needle, empty_haystack));
    }

    #[test]
    fn needle_longer_haystack() {
        assert_eq!(None, kmp_find(&['a', 'b', 'c'], &['a', 'b']));
    }

    proptest! {
        #[ignore]
        #[test]
        fn fuzz_input(needle in prop::collection::vec(".*", 0..100), haystack in prop::collection::vec(".*", 0..100)) {
            kmp_find(&needle, &haystack);
        }
    }
}