Skip to main content

oxihuman_core/
string_search.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! KMP pattern search and Rabin-Karp rolling hash.
6
7/// Build the KMP failure function for `pattern`.
8pub fn kmp_failure_func(pattern: &[u8]) -> Vec<usize> {
9    let m = pattern.len();
10    let mut fail = vec![0usize; m];
11    let mut k = 0usize;
12    for i in 1..m {
13        while k > 0 && pattern[k] != pattern[i] {
14            k = fail[k - 1];
15        }
16        if pattern[k] == pattern[i] {
17            k += 1;
18        }
19        fail[i] = k;
20    }
21    fail
22}
23
24/// Find all occurrences of `pattern` in `text` using KMP.
25pub fn kmp_find_all(text: &[u8], pattern: &[u8]) -> Vec<usize> {
26    if pattern.is_empty() {
27        return Vec::new();
28    }
29    let fail = kmp_failure_func(pattern);
30    let mut matches = Vec::new();
31    let mut k = 0usize;
32    for (i, &ch) in text.iter().enumerate() {
33        while k > 0 && pattern[k] != ch {
34            k = fail[k - 1];
35        }
36        if pattern[k] == ch {
37            k += 1;
38        }
39        if k == pattern.len() {
40            matches.push(i + 1 - pattern.len());
41            k = fail[k - 1];
42        }
43    }
44    matches
45}
46
47/// Check if `pattern` is a substring of `text` using KMP.
48pub fn kmp_contains(text: &[u8], pattern: &[u8]) -> bool {
49    !kmp_find_all(text, pattern).is_empty()
50}
51
52const RK_BASE: u64 = 257;
53const RK_MOD: u64 = 1_000_000_007;
54
55/// Rabin-Karp search: find all occurrences of `pattern` in `text`.
56pub fn rabin_karp_find_all(text: &[u8], pattern: &[u8]) -> Vec<usize> {
57    let m = pattern.len();
58    let n = text.len();
59    if m == 0 || m > n {
60        return Vec::new();
61    }
62
63    /* compute pattern hash and highest power */
64    let mut pat_hash = 0u64;
65    let mut window_hash = 0u64;
66    let mut high_power = 1u64;
67    for i in 0..m {
68        pat_hash = (pat_hash * RK_BASE + pattern[i] as u64) % RK_MOD;
69        window_hash = (window_hash * RK_BASE + text[i] as u64) % RK_MOD;
70        if i < m - 1 {
71            high_power = (high_power * RK_BASE) % RK_MOD;
72        }
73    }
74
75    let mut matches = Vec::new();
76    let mut start = 0;
77    loop {
78        if window_hash == pat_hash && text[start..start + m] == *pattern {
79            matches.push(start);
80        }
81        if start + m >= n {
82            break;
83        }
84        window_hash = (window_hash + RK_MOD - text[start] as u64 * high_power % RK_MOD) % RK_MOD;
85        window_hash = (window_hash * RK_BASE + text[start + m] as u64) % RK_MOD;
86        start += 1;
87    }
88    matches
89}
90
91/// Count occurrences of `pattern` in `text` (KMP).
92pub fn kmp_count(text: &[u8], pattern: &[u8]) -> usize {
93    kmp_find_all(text, pattern).len()
94}
95
96/// String search wrapper using KMP.
97pub struct StringSearcher {
98    pattern: Vec<u8>,
99    fail: Vec<usize>,
100}
101
102/// Construct a new StringSearcher for `pattern`.
103pub fn new_string_searcher(pattern: &str) -> StringSearcher {
104    let pat = pattern.as_bytes().to_vec();
105    let fail = kmp_failure_func(&pat);
106    StringSearcher { pattern: pat, fail }
107}
108
109impl StringSearcher {
110    /// Find all occurrences in `text`.
111    pub fn find_all(&self, text: &str) -> Vec<usize> {
112        let t = text.as_bytes();
113        let m = self.pattern.len();
114        if m == 0 {
115            return Vec::new();
116        }
117        let mut matches = Vec::new();
118        let mut k = 0usize;
119        for (i, &ch) in t.iter().enumerate() {
120            while k > 0 && self.pattern[k] != ch {
121                k = self.fail[k - 1];
122            }
123            if self.pattern[k] == ch {
124                k += 1;
125            }
126            if k == m {
127                matches.push(i + 1 - m);
128                k = self.fail[k - 1];
129            }
130        }
131        matches
132    }
133
134    /// Count occurrences.
135    pub fn count(&self, text: &str) -> usize {
136        self.find_all(text).len()
137    }
138
139    /// Check if pattern exists in text.
140    pub fn contains(&self, text: &str) -> bool {
141        !self.find_all(text).is_empty()
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn test_kmp_no_match() {
151        /* KMP returns empty when pattern not found */
152        let r = kmp_find_all(b"hello world", b"xyz");
153        assert!(r.is_empty());
154    }
155
156    #[test]
157    fn test_kmp_single_match() {
158        /* KMP finds single occurrence */
159        let r = kmp_find_all(b"abcabc", b"abc");
160        assert_eq!(r[0], 0);
161    }
162
163    #[test]
164    fn test_kmp_multiple_matches() {
165        /* KMP finds overlapping matches */
166        let r = kmp_find_all(b"aaa", b"aa");
167        assert_eq!(r.len(), 2);
168    }
169
170    #[test]
171    fn test_kmp_contains() {
172        /* kmp_contains returns correct bool */
173        assert!(kmp_contains(b"hello world", b"world"));
174        assert!(!kmp_contains(b"hello", b"xyz"));
175    }
176
177    #[test]
178    fn test_rabin_karp_finds_match() {
179        /* Rabin-Karp finds pattern in text */
180        let r = rabin_karp_find_all(b"abcabc", b"bc");
181        assert!(r.contains(&1));
182        assert!(r.contains(&4));
183    }
184
185    #[test]
186    fn test_rabin_karp_no_match() {
187        /* Rabin-Karp returns empty when no match */
188        let r = rabin_karp_find_all(b"hello", b"xyz");
189        assert!(r.is_empty());
190    }
191
192    #[test]
193    fn test_string_searcher_find_all() {
194        /* StringSearcher.find_all works correctly */
195        let s = new_string_searcher("ab");
196        let r = s.find_all("ababab");
197        assert_eq!(r.len(), 3);
198    }
199
200    #[test]
201    fn test_kmp_count() {
202        /* kmp_count returns correct number of occurrences */
203        assert_eq!(kmp_count(b"banana", b"an"), 2);
204    }
205
206    #[test]
207    fn test_searcher_contains() {
208        /* StringSearcher.contains returns true when pattern exists */
209        let s = new_string_searcher("world");
210        assert!(s.contains("hello world"));
211        assert!(!s.contains("hello earth"));
212    }
213}