1#![allow(clippy::needless_range_loop)]
17
18pub fn suffix_array(s: &[u32], alphabet_size: usize) -> Vec<u32> {
24 let n = s.len();
25 let mut sa = vec![0u32; n];
26 if n <= 1 {
27 return sa;
28 }
29 let s_usize: Vec<usize> = s.iter().map(|&x| x as usize).collect();
30 let mut sa_usize = vec![0usize; n];
31 sais(&s_usize, &mut sa_usize, alphabet_size);
32 for (dst, &v) in sa.iter_mut().zip(sa_usize.iter()) {
33 *dst = v as u32;
34 }
35 sa
36}
37
38#[inline]
39fn is_lms(t: &[bool], i: usize) -> bool {
40 i > 0 && t[i] && !t[i - 1]
41}
42
43fn lms_substrings_equal(s: &[usize], t: &[bool], a: usize, b: usize) -> bool {
47 let n = s.len();
48 if a == n - 1 || b == n - 1 {
49 return a == b;
50 }
51 let mut i = 0;
52 loop {
53 let ai = a + i;
54 let bi = b + i;
55 if ai >= n || bi >= n {
56 return false;
57 }
58 let a_lms = is_lms(t, ai);
59 let b_lms = is_lms(t, bi);
60 if i > 0 && a_lms && b_lms {
61 return true; }
63 if a_lms != b_lms {
64 return false; }
66 if s[ai] != s[bi] || t[ai] != t[bi] {
67 return false;
68 }
69 i += 1;
70 }
71}
72
73fn buckets(sizes: &[usize], tail: bool) -> Vec<usize> {
75 let mut b = vec![0usize; sizes.len()];
76 let mut sum = 0usize;
77 for (i, &sz) in sizes.iter().enumerate() {
78 sum += sz;
79 b[i] = if tail { sum } else { sum - sz };
80 }
81 b
82}
83
84fn bucket_sizes(s: &[usize], k: usize) -> Vec<usize> {
85 let mut sizes = vec![0usize; k];
86 for &c in s {
87 sizes[c] += 1;
88 }
89 sizes
90}
91
92fn induce(s: &[usize], sa: &mut [usize], t: &[bool], sizes: &[usize], k: usize) {
94 let n = s.len();
95 let none = usize::MAX;
96 let mut head = buckets(sizes, false);
98 for i in 0..n {
99 let j = sa[i];
100 if j != none && j > 0 && !t[j - 1] {
101 let c = s[j - 1];
102 sa[head[c]] = j - 1;
103 head[c] += 1;
104 }
105 }
106 let _ = k;
107 let mut tail = buckets(sizes, true);
109 for i in (0..n).rev() {
110 let j = sa[i];
111 if j != none && j > 0 && t[j - 1] {
112 let c = s[j - 1];
113 tail[c] -= 1;
114 sa[tail[c]] = j - 1;
115 }
116 }
117}
118
119fn sais(s: &[usize], sa: &mut [usize], k: usize) {
120 let n = s.len();
121 let none = usize::MAX;
122 if n == 1 {
123 sa[0] = 0;
124 return;
125 }
126 let mut t = vec![false; n];
128 t[n - 1] = true;
129 for i in (0..n - 1).rev() {
130 t[i] = s[i] < s[i + 1] || (s[i] == s[i + 1] && t[i + 1]);
131 }
132
133 let sizes = bucket_sizes(s, k);
134
135 for v in sa.iter_mut() {
137 *v = none;
138 }
139 let mut tail = buckets(&sizes, true);
140 for i in 1..n {
141 if is_lms(&t, i) {
142 let c = s[i];
143 tail[c] -= 1;
144 sa[tail[c]] = i;
145 }
146 }
147 induce(s, sa, &t, &sizes, k);
148
149 let mut lms_sorted: Vec<usize> = Vec::new();
151 for i in 0..n {
152 let j = sa[i];
153 if j != none && is_lms(&t, j) {
154 lms_sorted.push(j);
155 }
156 }
157 let mut names = vec![none; n];
158 let mut name = 0usize;
159 names[lms_sorted[0]] = 0;
160 let mut prev = lms_sorted[0];
161 for &cur in lms_sorted.iter().skip(1) {
162 if !lms_substrings_equal(s, &t, prev, cur) {
163 name += 1;
164 }
165 names[cur] = name;
166 prev = cur;
167 }
168 let num_names = name + 1;
169
170 let mut lms_positions: Vec<usize> = (1..n).filter(|&i| is_lms(&t, i)).collect();
172 lms_positions.sort_unstable();
173 let reduced: Vec<usize> = lms_positions.iter().map(|&p| names[p]).collect();
174
175 let mut reduced_sa = vec![0usize; reduced.len()];
177 if num_names == reduced.len() {
178 for (i, &nm) in reduced.iter().enumerate() {
180 reduced_sa[nm] = i;
181 }
182 } else {
183 sais(&reduced, &mut reduced_sa, num_names);
184 }
185
186 for v in sa.iter_mut() {
188 *v = none;
189 }
190 let mut tail = buckets(&sizes, true);
191 for idx in (0..reduced_sa.len()).rev() {
193 let p = lms_positions[reduced_sa[idx]];
194 let c = s[p];
195 tail[c] -= 1;
196 sa[tail[c]] = p;
197 }
198 induce(s, sa, &t, &sizes, k);
199}
200
201pub fn lcp_kasai(s: &[u32], sa: &[u32]) -> Vec<u32> {
204 let n = s.len();
205 let mut lcp = vec![0u32; n];
206 if n == 0 {
207 return lcp;
208 }
209 let mut rank = vec![0usize; n];
210 for (i, &p) in sa.iter().enumerate() {
211 rank[p as usize] = i;
212 }
213 let mut h = 0usize;
214 for i in 0..n {
215 if rank[i] > 0 {
216 let j = sa[rank[i] - 1] as usize;
217 while i + h < n && j + h < n && s[i + h] == s[j + h] {
218 h += 1;
219 }
220 lcp[rank[i]] = h as u32;
221 h = h.saturating_sub(1);
222 } else {
223 h = 0;
224 }
225 }
226 lcp
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232
233 fn naive_sa(s: &[u32]) -> Vec<u32> {
235 let n = s.len();
236 let mut idx: Vec<u32> = (0..n as u32).collect();
237 idx.sort_by(|&a, &b| s[a as usize..].cmp(&s[b as usize..]));
238 idx
239 }
240
241 fn naive_lcp(s: &[u32], sa: &[u32]) -> Vec<u32> {
242 let mut lcp = vec![0u32; sa.len()];
243 for i in 1..sa.len() {
244 let a = &s[sa[i - 1] as usize..];
245 let b = &s[sa[i] as usize..];
246 let mut k = 0;
247 while k < a.len() && k < b.len() && a[k] == b[k] {
248 k += 1;
249 }
250 lcp[i] = k as u32;
251 }
252 lcp
253 }
254
255 fn with_sentinel(body: &[u32]) -> (Vec<u32>, usize) {
257 let mut s: Vec<u32> = body.iter().map(|&x| x + 1).collect();
258 s.push(0);
259 let k = s.iter().copied().max().unwrap() as usize + 1;
260 (s, k)
261 }
262
263 #[test]
264 fn matches_naive_on_small_strings() {
265 let cases: &[&[u32]] = &[
266 &[],
267 &[0],
268 &[1, 1, 1, 1],
269 &[3, 1, 2, 1, 2, 3],
270 &[1, 2, 1, 2, 1, 2, 1],
271 &[5, 4, 3, 2, 1],
272 &[1, 2, 3, 4, 5],
273 ];
274 for c in cases {
275 let (s, k) = with_sentinel(c);
276 let sa = suffix_array(&s, k);
277 assert_eq!(sa, naive_sa(&s), "SA mismatch on {c:?}");
278 assert_eq!(
279 lcp_kasai(&s, &sa),
280 naive_lcp(&s, &sa),
281 "LCP mismatch on {c:?}"
282 );
283 }
284 }
285
286 #[test]
287 fn matches_naive_on_random_strings() {
288 let mut state: u64 = 0x9E3779B97F4A7C15;
290 let mut next = || {
291 state = state
292 .wrapping_mul(6364136223846793005)
293 .wrapping_add(1442695040888963407);
294 (state >> 33) as u32
295 };
296 for _ in 0..400 {
297 let len = (next() % 60) as usize + 1;
298 let alpha = (next() % 4) + 1; let body: Vec<u32> = (0..len).map(|_| next() % alpha).collect();
300 let (s, k) = with_sentinel(&body);
301 let sa = suffix_array(&s, k);
302 assert_eq!(sa, naive_sa(&s), "SA mismatch on {body:?}");
303 assert_eq!(
304 lcp_kasai(&s, &sa),
305 naive_lcp(&s, &sa),
306 "LCP mismatch on {body:?}"
307 );
308 }
309 }
310
311 #[test]
312 fn finds_repeat_via_lcp() {
313 let (s, k) = with_sentinel(&[1, 2, 3, 1, 2, 3]);
315 let sa = suffix_array(&s, k);
316 let lcp = lcp_kasai(&s, &sa);
317 assert!(
318 lcp.iter().any(|&l| l >= 3),
319 "expected an LCP ≥ 3, got {lcp:?}"
320 );
321 }
322}