1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
use alloc::vec::Vec; use crate::{kmp_find_with_lsp_table, kmp_table}; /// Matches a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] returning /// a vector of positions in the `haystack`. /// /// For matches that overlap, only the indices corresponding to the first match are returned. /// /// Note: This function generates a `longest suffix-prefix` table prior to matching. /// If multiple haystacks are to be matched using the same needle, consider first manually generating /// said table using [`kmp_table`] and then matching each haystack using [`kmp_match_with_lsp_table`]. /// /// # Examples /// /// ``` /// use kmp::kmp_match; /// /// assert_eq!(kmp_match(&['y'], &['h', 'a', 'y', 's', 't', 'a', 'c', 'k']), vec![2]); /// assert_eq!(kmp_match(&[1, 2, 1], &[1, 2, 1, 2, 1]), vec![0]); /// assert_eq!(kmp_match(&[true, false, true], &[true, false]), vec![]); /// assert_eq!(kmp_match(&[Some(-1), Some(-2)], &[Some(-1), Some(-2), None, Some(-2), Some(-1), Some(-2)]), vec![0, 4]); /// /// // because the needle and haystack only rely on being `PartialEq`, they can have different types /// // hence rustc can not infer the type of `needle` and we have to define it /// let empty_needle: &[&str; 0] = &[]; /// assert_eq!(kmp_match(empty_needle, &["some", "words"]), vec![0, 1, 2]); /// ``` /// /// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm /// [`kmp_table`]: ./fn.kmp_table.html /// [`kmp_match_with_lsp_table`]: ./fn.kmp_match_with_lsp_table.html pub fn kmp_match<N, H>(needle: &[N], haystack: &[H]) -> Vec<usize> where N: PartialEq, H: PartialEq<N>, { kmp_match_with_lsp_table(needle, haystack, &kmp_table(needle)) } /// Matches a `needle` in a `haystack` using the [`Knuth–Morris–Pratt algorithm`] with a given /// `longest suffix-prefix` table generated by [`kmp_table`] and returns a vector of positions in the `haystack`. /// /// For matches that overlap, only the indices corresponding to the first match are returned. /// /// If you're only matching in a single haystack consider using [`kmp_match`] for convenience. /// /// # Examples /// /// ``` /// use kmp::{kmp_match_with_lsp_table, kmp_table}; /// /// let needle = &['a', 'b']; /// let table = kmp_table(needle); /// /// assert_eq!(table, vec![0, 0]); /// /// assert_eq!(kmp_match_with_lsp_table(needle, &['a', 'a', 'b', 'a', 'c', 'd', 'a', 'b', 'd'], &table), vec![1, 6]); /// assert_eq!(kmp_match_with_lsp_table(needle, &['d', 'c', 'a', 'b', 'a', 'a', 'b', 'a'], &table), vec![2, 5]); /// ``` /// /// [`Knuth–Morris–Pratt algorithm`]: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm /// [`kmp_table`]: ./fn.kmp_table.html /// [`kmp_match`]: ./fn.kmp_match.html pub fn kmp_match_with_lsp_table<N, H>(needle: &[N], haystack: &[H], lsp: &[usize]) -> Vec<usize> where N: PartialEq, H: PartialEq<N>, { if needle.is_empty() { // if the needle is empty, every "in between" characters is a match // note: this is here just to replicate std::str::match_indices() behaviour return (0..=haystack.len()).collect(); } let mut start = 0; let mut locations = Vec::new(); #[allow(clippy::indexing_slicing, clippy::integer_arithmetic)] while let Some(position) = kmp_find_with_lsp_table(needle, &haystack[start..], lsp) { let position = position + start; start = position + needle.len(); locations.push(position); if start > haystack.len() - (needle.len() - 1) { break; } } locations } #[cfg(test)] mod tests { use alloc::vec; use proptest::prelude::*; use crate::kmp_match; #[test] fn basic() { assert_eq!( vec![0, 6, 12], kmp_match( &['a', 'b', 'c'], &['a', 'b', 'c', 'X', 'X', 'X', 'a', 'b', 'c', 'Y', 'Y', 'Y', 'a', 'b', 'c'], ) ); } #[test] fn concatenated() { assert_eq!( vec![1, 4], kmp_match(&['a', 'b', 'c'], &['1', 'a', 'b', 'c', 'a', 'b', 'c', '2']) ); } #[test] fn combined() { assert_eq!( vec![1], kmp_match(&['a', 'b', 'a'], &['1', 'a', 'b', 'a', 'b', 'a', '2']) ); } #[test] fn empty_needle() { assert_eq!(vec![0, 1, 2], kmp_match(&[], &['a', 'b'])); } #[test] fn empty_haystack() { // we have to help with type inference here let empty_haystack: &[char; 0] = &[]; assert!(kmp_match(&['a', 'b', 'c'], empty_haystack).is_empty()); } #[test] fn empty_both() { // we have to help with type inference here let empty_needle: &[char; 0] = &[]; let empty_haystack: &[char; 0] = &[]; assert_eq!(vec![0], kmp_match(empty_needle, empty_haystack)); } #[test] fn needle_longer_haystack() { assert!(kmp_match(&['a', 'b', 'c'], &['a', 'b']).is_empty()); } proptest! { #[ignore] #[test] fn fuzz_input(needle in prop::collection::vec(".*", 0..100), haystack in prop::collection::vec(".*", 0..100)) { kmp_match(&needle, &haystack); } } }