kmp 0.1.1

Various functions using the Knuth–Morris–Pratt algorithm to efficiently find patterns.
Documentation
use alloc::vec::Vec;

use crate::{kmp_find_with_lsp_table, kmp_table};

/// Matches a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] returning
/// a vector of positions in the `haystack`.
///
/// For matches that overlap, only the indices corresponding to the first match are returned.
///
/// Note: This function generates a `longest suffix-prefix` table prior to matching.
/// If multiple haystacks are to be matched using the same needle, consider first manually generating
/// said table using [`kmp_table`] and then matching each haystack using [`kmp_match_with_lsp_table`].
///
/// # Examples
///
/// ```
/// use kmp::kmp_match;
///
/// assert_eq!(kmp_match(&['y'], &['h', 'a', 'y', 's', 't', 'a', 'c', 'k']), vec![2]);
/// assert_eq!(kmp_match(&[1, 2, 1], &[1, 2, 1, 2, 1]), vec![0]);
/// assert_eq!(kmp_match(&[true, false, true], &[true, false]), vec![]);
/// assert_eq!(kmp_match(&[Some(-1), Some(-2)], &[Some(-1), Some(-2), None, Some(-2), Some(-1), Some(-2)]), vec![0, 4]);
///
/// // because the needle and haystack only rely on being `PartialEq`, they can have different types
/// // hence rustc can not infer the type of `needle` and we have to define it
/// let empty_needle: &[&str; 0] = &[];
/// assert_eq!(kmp_match(empty_needle, &["some", "words"]), vec![0, 1, 2]);
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_match_with_lsp_table`]: ./fn.kmp_match_with_lsp_table.html
pub fn kmp_match<N, H>(needle: &[N], haystack: &[H]) -> Vec<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    kmp_match_with_lsp_table(needle, haystack, &kmp_table(needle))
}

/// Matches a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] with a given
/// `longest suffix-prefix` table generated by [`kmp_table`] and returns a vector of positions in the `haystack`.
///
/// For matches that overlap, only the indices corresponding to the first match are returned.
///
/// If you're only matching in a single haystack consider using [`kmp_match`] for convenience.
///
/// # Examples
///
/// ```
/// use kmp::{kmp_match_with_lsp_table, kmp_table};
///
/// let needle = &['a', 'b'];
/// let table = kmp_table(needle);
///
/// assert_eq!(table, vec![0, 0]);
///
/// assert_eq!(kmp_match_with_lsp_table(needle, &['a', 'a', 'b', 'a', 'c', 'd', 'a', 'b', 'd'], &table), vec![1, 6]);
/// assert_eq!(kmp_match_with_lsp_table(needle, &['d', 'c', 'a', 'b', 'a', 'a', 'b', 'a'], &table), vec![2, 5]);
/// ```
///
/// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
/// [`kmp_table`]: ./fn.kmp_table.html
/// [`kmp_match`]: ./fn.kmp_match.html
pub fn kmp_match_with_lsp_table<N, H>(needle: &[N], haystack: &[H], lsp: &[usize]) -> Vec<usize>
where
    N: PartialEq,
    H: PartialEq<N>,
{
    if needle.is_empty() {
        // if the needle is empty, every "in between" characters is a match
        // note: this is here just to replicate std::str::match_indices() behaviour
        return (0..=haystack.len()).collect();
    }

    let mut start = 0;
    let mut locations = Vec::new();

    #[allow(clippy::indexing_slicing, clippy::integer_arithmetic)]
    while let Some(position) = kmp_find_with_lsp_table(needle, &haystack[start..], lsp) {
        let position = position + start;
        start = position + needle.len();
        locations.push(position);
        if start > haystack.len() - (needle.len() - 1) {
            break;
        }
    }

    locations
}

#[cfg(test)]
mod tests {
    use alloc::vec;

    use proptest::prelude::*;

    use crate::kmp_match;

    #[test]
    fn basic() {
        assert_eq!(
            vec![0, 6, 12],
            kmp_match(
                &['a', 'b', 'c'],
                &['a', 'b', 'c', 'X', 'X', 'X', 'a', 'b', 'c', 'Y', 'Y', 'Y', 'a', 'b', 'c'],
            )
        );
    }

    #[test]
    fn concatenated() {
        assert_eq!(
            vec![1, 4],
            kmp_match(&['a', 'b', 'c'], &['1', 'a', 'b', 'c', 'a', 'b', 'c', '2'])
        );
    }

    #[test]
    fn combined() {
        assert_eq!(
            vec![1],
            kmp_match(&['a', 'b', 'a'], &['1', 'a', 'b', 'a', 'b', 'a', '2'])
        );
    }

    #[test]
    fn empty_needle() {
        assert_eq!(vec![0, 1, 2], kmp_match(&[], &['a', 'b']));
    }

    #[test]
    fn empty_haystack() {
        // we have to help with type inference here
        let empty_haystack: &[char; 0] = &[];
        assert!(kmp_match(&['a', 'b', 'c'], empty_haystack).is_empty());
    }

    #[test]
    fn empty_both() {
        // we have to help with type inference here
        let empty_needle: &[char; 0] = &[];
        let empty_haystack: &[char; 0] = &[];
        assert_eq!(vec![0], kmp_match(empty_needle, empty_haystack));
    }

    #[test]
    fn needle_longer_haystack() {
        assert!(kmp_match(&['a', 'b', 'c'], &['a', 'b']).is_empty());
    }

    proptest! {
        #[ignore]
        #[test]
        fn fuzz_input(needle in prop::collection::vec(".*", 0..100), haystack in prop::collection::vec(".*", 0..100)) {
            kmp_match(&needle, &haystack);
        }
    }
}