oxihuman_core/
string_search.rs1#![allow(dead_code)]
4
5pub fn kmp_failure_func(pattern: &[u8]) -> Vec<usize> {
9 let m = pattern.len();
10 let mut fail = vec![0usize; m];
11 let mut k = 0usize;
12 for i in 1..m {
13 while k > 0 && pattern[k] != pattern[i] {
14 k = fail[k - 1];
15 }
16 if pattern[k] == pattern[i] {
17 k += 1;
18 }
19 fail[i] = k;
20 }
21 fail
22}
23
24pub fn kmp_find_all(text: &[u8], pattern: &[u8]) -> Vec<usize> {
26 if pattern.is_empty() {
27 return Vec::new();
28 }
29 let fail = kmp_failure_func(pattern);
30 let mut matches = Vec::new();
31 let mut k = 0usize;
32 for (i, &ch) in text.iter().enumerate() {
33 while k > 0 && pattern[k] != ch {
34 k = fail[k - 1];
35 }
36 if pattern[k] == ch {
37 k += 1;
38 }
39 if k == pattern.len() {
40 matches.push(i + 1 - pattern.len());
41 k = fail[k - 1];
42 }
43 }
44 matches
45}
46
47pub fn kmp_contains(text: &[u8], pattern: &[u8]) -> bool {
49 !kmp_find_all(text, pattern).is_empty()
50}
51
52const RK_BASE: u64 = 257;
53const RK_MOD: u64 = 1_000_000_007;
54
55pub fn rabin_karp_find_all(text: &[u8], pattern: &[u8]) -> Vec<usize> {
57 let m = pattern.len();
58 let n = text.len();
59 if m == 0 || m > n {
60 return Vec::new();
61 }
62
63 let mut pat_hash = 0u64;
65 let mut window_hash = 0u64;
66 let mut high_power = 1u64;
67 for i in 0..m {
68 pat_hash = (pat_hash * RK_BASE + pattern[i] as u64) % RK_MOD;
69 window_hash = (window_hash * RK_BASE + text[i] as u64) % RK_MOD;
70 if i < m - 1 {
71 high_power = (high_power * RK_BASE) % RK_MOD;
72 }
73 }
74
75 let mut matches = Vec::new();
76 let mut start = 0;
77 loop {
78 if window_hash == pat_hash && text[start..start + m] == *pattern {
79 matches.push(start);
80 }
81 if start + m >= n {
82 break;
83 }
84 window_hash = (window_hash + RK_MOD - text[start] as u64 * high_power % RK_MOD) % RK_MOD;
85 window_hash = (window_hash * RK_BASE + text[start + m] as u64) % RK_MOD;
86 start += 1;
87 }
88 matches
89}
90
91pub fn kmp_count(text: &[u8], pattern: &[u8]) -> usize {
93 kmp_find_all(text, pattern).len()
94}
95
96pub struct StringSearcher {
98 pattern: Vec<u8>,
99 fail: Vec<usize>,
100}
101
102pub fn new_string_searcher(pattern: &str) -> StringSearcher {
104 let pat = pattern.as_bytes().to_vec();
105 let fail = kmp_failure_func(&pat);
106 StringSearcher { pattern: pat, fail }
107}
108
109impl StringSearcher {
110 pub fn find_all(&self, text: &str) -> Vec<usize> {
112 let t = text.as_bytes();
113 let m = self.pattern.len();
114 if m == 0 {
115 return Vec::new();
116 }
117 let mut matches = Vec::new();
118 let mut k = 0usize;
119 for (i, &ch) in t.iter().enumerate() {
120 while k > 0 && self.pattern[k] != ch {
121 k = self.fail[k - 1];
122 }
123 if self.pattern[k] == ch {
124 k += 1;
125 }
126 if k == m {
127 matches.push(i + 1 - m);
128 k = self.fail[k - 1];
129 }
130 }
131 matches
132 }
133
134 pub fn count(&self, text: &str) -> usize {
136 self.find_all(text).len()
137 }
138
139 pub fn contains(&self, text: &str) -> bool {
141 !self.find_all(text).is_empty()
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn test_kmp_no_match() {
151 let r = kmp_find_all(b"hello world", b"xyz");
153 assert!(r.is_empty());
154 }
155
156 #[test]
157 fn test_kmp_single_match() {
158 let r = kmp_find_all(b"abcabc", b"abc");
160 assert_eq!(r[0], 0);
161 }
162
163 #[test]
164 fn test_kmp_multiple_matches() {
165 let r = kmp_find_all(b"aaa", b"aa");
167 assert_eq!(r.len(), 2);
168 }
169
170 #[test]
171 fn test_kmp_contains() {
172 assert!(kmp_contains(b"hello world", b"world"));
174 assert!(!kmp_contains(b"hello", b"xyz"));
175 }
176
177 #[test]
178 fn test_rabin_karp_finds_match() {
179 let r = rabin_karp_find_all(b"abcabc", b"bc");
181 assert!(r.contains(&1));
182 assert!(r.contains(&4));
183 }
184
185 #[test]
186 fn test_rabin_karp_no_match() {
187 let r = rabin_karp_find_all(b"hello", b"xyz");
189 assert!(r.is_empty());
190 }
191
192 #[test]
193 fn test_string_searcher_find_all() {
194 let s = new_string_searcher("ab");
196 let r = s.find_all("ababab");
197 assert_eq!(r.len(), 3);
198 }
199
200 #[test]
201 fn test_kmp_count() {
202 assert_eq!(kmp_count(b"banana", b"an"), 2);
204 }
205
206 #[test]
207 fn test_searcher_contains() {
208 let s = new_string_searcher("world");
210 assert!(s.contains("hello world"));
211 assert!(!s.contains("hello earth"));
212 }
213}