texcraft_stdext/algorithms/substringsearch.rs
1//! Knuth–Morris–Pratt substring search algorithm
2//!
3//! This module contains an optimal in time algorithm for finding a substring
4//! in a string. By 'string' and 'substring' we mean a vector of elements of the same type.
5//! The algorithm is due to
6//! [Knuth, Morris and Pratt](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm).
7//!
8//! The API for this module is based on two assumptions:
9//!
10//! - There may be multiple searches for the same substring in different strings.
11//!
12//! - The elements of the string may be generated on the demand as the search progresses.
13//! That is, the full string is not necessarily known at the start.
14//!
15//! ## Example
16//!
17//! To use the algorithm, a [Matcher] is first created.
18//! This factory takes ownership of the substring.
19//! On initialization the matcher computes a number of internal
20//! quantities which make the subsequent matching fast.
21//! These quantities depend on the substring, so mutating the substring
22//! after it has been passed to the matcher is statically prevented.
23//!
24//! To match a string, a new [Search] instance is created by calling [Matcher::start]. Elements
25//! of the string are passed in one at a time to the `next` method
26//! of the matcher.
27//! If the substring has length `m` and matches the last `m` elements that
28//! have been passed in, the `next` method returns `true`.
29//! Otherwise it returns `false`.
30//! The matcher may be used to find multiple instances of the substring
31//! in the same string.
32//!
33//! ```
34//! # use texcraft_stdext::algorithms::substringsearch::Matcher;
35//! # use texcraft_stdext::collections::nevec::Nevec;
36//! # use texcraft_stdext::nevec;
37//!
38//! let substring = nevec![2, 3, 2];
39//! let matcher = Matcher::new(substring);
40//! let mut search = matcher.start();
41//! assert_eq![search.next(&1), false];
42//! assert_eq![search.next(&2), false];
43//! assert_eq![search.next(&3), false];
44//! assert_eq![search.next(&2), true];
45//! assert_eq![search.next(&3), false];
46//! assert_eq![search.next(&2), true];
47//! ```
48//!
49use crate::collections::nevec::Nevec;
50
51/// Data structure used to match a specific substring in many strings.
52#[derive(Debug, Clone)]
53#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54pub struct Matcher<T: PartialEq> {
55 substring: Nevec<T>,
56 prefix_fn: Nevec<usize>,
57}
58
59impl<T: PartialEq> Matcher<T> {
60 /// Create a new matcher that searches for the provide substring.
61 pub fn new(substring: Nevec<T>) -> Matcher<T> {
62 let mut prefix_fn = Nevec::with_capacity(0, substring.len());
63 let mut k = 0;
64 for i in 1..substring.len() {
65 while k > 0 && substring[k] != substring[i] {
66 k = prefix_fn[k - 1];
67 }
68 if substring[k] == substring[i] {
69 k += 1;
70 }
71 prefix_fn.push(k);
72 }
73
74 Matcher {
75 substring,
76 prefix_fn,
77 }
78 }
79
80 /// Start a new substring search.
81 pub fn start(&self) -> Search<T> {
82 Search {
83 factory: self,
84 q: 0,
85 }
86 }
87
88 /// Get an immutable reference to the underlying substring.
89 //
90 // Obtaining a mutable reference is not supported as internal details of
91 // the matcher factory rely on the substring remaining constant.
92 pub fn substring(&self) -> &Nevec<T> {
93 &self.substring
94 }
95
96 /// Retake ownership of the underlying substring.
97 pub fn take_substring(self) -> Nevec<T> {
98 self.substring
99 }
100}
101
102/// Data structure used to search for specific substring within a specific string.
103pub struct Search<'a, T: PartialEq> {
104 factory: &'a Matcher<T>,
105 q: usize,
106}
107
108impl<'a, T: PartialEq> Search<'a, T> {
109 /// Provide the next element of the string to the matcher.
110 /// This returns true if the last `m` elements of the string match the substring, where
111 /// `m` is the length of the substring.
112 pub fn next(&mut self, tail: &T) -> bool {
113 while self.q > 0 && &self.factory.substring[self.q] != tail {
114 self.q = self.factory.prefix_fn[self.q - 1];
115 }
116 if &self.factory.substring[self.q] == tail {
117 self.q += 1;
118 }
119 if self.q == self.factory.substring.len() {
120 self.q = self.factory.prefix_fn[self.q - 1];
121 return true;
122 }
123 false
124 }
125}