1#![allow(clippy::many_single_char_names)]
2
3fn sa_naive<T: Ord>(s: &[T]) -> Vec<usize> {
4 let n = s.len();
5 let mut sa: Vec<usize> = (0..n).collect();
6 sa.sort_by(|&(mut l), &(mut r)| {
7 if l == r {
8 return std::cmp::Ordering::Equal;
9 }
10 while l < n && r < n {
11 if s[l] != s[r] {
12 return s[l].cmp(&s[r]);
13 }
14 l += 1;
15 r += 1;
16 }
17 if l == n {
18 std::cmp::Ordering::Less
19 } else {
20 std::cmp::Ordering::Greater
21 }
22 });
23 sa
24}
25
26fn sa_doubling(s: &[i32]) -> Vec<usize> {
27 let n = s.len();
28 let mut sa: Vec<usize> = (0..n).collect();
29 let mut rnk: Vec<i32> = s.to_vec();
30 let mut tmp = vec![0; n];
31 let mut k = 1;
32 while k < n {
33 let cmp = |&x: &usize, &y: &usize| {
34 if rnk[x] != rnk[y] {
35 return rnk[x].cmp(&rnk[y]);
36 }
37 let rx = if x + k < n { rnk[x + k] } else { -1 };
38 let ry = if y + k < n { rnk[y + k] } else { -1 };
39 rx.cmp(&ry)
40 };
41 sa.sort_by(cmp);
42 tmp[sa[0]] = 0;
43 for i in 1..n {
44 tmp[sa[i]] =
45 tmp[sa[i - 1]] + i32::from(cmp(&sa[i - 1], &sa[i]) == std::cmp::Ordering::Less);
46 }
47 std::mem::swap(&mut tmp, &mut rnk);
48 k *= 2;
49 }
50 sa
51}
52
53trait Threshold {
54 fn threshold_naive() -> usize;
55 fn threshold_doubling() -> usize;
56}
57
58enum DefaultThreshold {}
59impl Threshold for DefaultThreshold {
60 fn threshold_naive() -> usize {
61 10
62 }
63 fn threshold_doubling() -> usize {
64 40
65 }
66}
67
68#[allow(clippy::cognitive_complexity)]
69fn sa_is<T: Threshold>(s: &[usize], upper: usize) -> Vec<usize> {
70 let n = s.len();
71 match n {
72 0 => return vec![],
73 1 => return vec![0],
74 2 => return if s[0] < s[1] { vec![0, 1] } else { vec![1, 0] },
75 _ => (),
76 }
77 if n < T::threshold_naive() {
78 return sa_naive(s);
79 }
80 if n < T::threshold_doubling() {
81 let s: Vec<i32> = s.iter().map(|&x| x as i32).collect();
82 return sa_doubling(&s);
83 }
84 let mut sa = vec![0; n];
85 let mut ls = vec![false; n];
86 for i in (0..n - 1).rev() {
87 ls[i] = if s[i] == s[i + 1] {
88 ls[i + 1]
89 } else {
90 s[i] < s[i + 1]
91 };
92 }
93 let mut sum_l = vec![0; upper + 1];
94 let mut sum_s = vec![0; upper + 1];
95 for i in 0..n {
96 if !ls[i] {
97 sum_s[s[i]] += 1;
98 } else {
99 sum_l[s[i] + 1] += 1;
100 }
101 }
102 for i in 0..=upper {
103 sum_s[i] += sum_l[i];
104 if i < upper {
105 sum_l[i + 1] += sum_s[i];
106 }
107 }
108
109 let induce = |sa: &mut [usize], lms: &[usize]| {
111 for elem in sa.iter_mut() {
112 *elem = 0;
113 }
114 let mut buf = sum_s.clone();
115 for &d in lms {
116 if d == n {
117 continue;
118 }
119 let old = buf[s[d]];
120 buf[s[d]] += 1;
121 sa[old] = d + 1;
122 }
123 buf.copy_from_slice(&sum_l);
124 let old = buf[s[n - 1]];
125 buf[s[n - 1]] += 1;
126 sa[old] = n;
127 for i in 0..n {
128 let v = sa[i];
129 if v >= 2 && !ls[v - 2] {
130 let old = buf[s[v - 2]];
131 buf[s[v - 2]] += 1;
132 sa[old] = v - 1;
133 }
134 }
135 buf.copy_from_slice(&sum_l);
136 for i in (0..n).rev() {
137 let v = sa[i];
138 if v >= 2 && ls[v - 2] {
139 buf[s[v - 2] + 1] -= 1;
140 sa[buf[s[v - 2] + 1]] = v - 1;
141 }
142 }
143 };
144 let mut lms_map = vec![0; n + 1];
146 let mut m = 0;
147 for i in 1..n {
148 if !ls[i - 1] && ls[i] {
149 lms_map[i] = m + 1;
150 m += 1;
151 }
152 }
153 let mut lms = Vec::with_capacity(m);
154 for i in 1..n {
155 if !ls[i - 1] && ls[i] {
156 lms.push(i);
157 }
158 }
159 assert_eq!(lms.len(), m);
160 induce(&mut sa, &lms);
161
162 if m > 0 {
163 let mut sorted_lms = Vec::with_capacity(m);
164 for &v in &sa {
165 if lms_map[v - 1] != 0 {
166 sorted_lms.push(v - 1);
167 }
168 }
169 let mut rec_s = vec![0; m];
170 let mut rec_upper = 0;
171 rec_s[lms_map[sorted_lms[0]] - 1] = 0;
172 for i in 1..m {
173 let mut l = sorted_lms[i - 1];
174 let mut r = sorted_lms[i];
175 let end_l = if lms_map[l] < m { lms[lms_map[l]] } else { n };
176 let end_r = if lms_map[r] < m { lms[lms_map[r]] } else { n };
177 let same = if end_l - l != end_r - r {
178 false
179 } else {
180 while l < end_l {
181 if s[l] != s[r] {
182 break;
183 }
184 l += 1;
185 r += 1;
186 }
187 l != n && s[l] == s[r]
188 };
189 if !same {
190 rec_upper += 1;
191 }
192 rec_s[lms_map[sorted_lms[i]] - 1] = rec_upper;
193 }
194
195 let rec_sa = sa_is::<T>(&rec_s, rec_upper);
196 for i in 0..m {
197 sorted_lms[i] = lms[rec_sa[i]];
198 }
199 induce(&mut sa, &mut sorted_lms);
200 }
201 for elem in sa.iter_mut() {
202 *elem -= 1;
203 }
204 sa
205}
206
207fn sa_is_i32<T: Threshold>(s: &[i32], upper: i32) -> Vec<usize> {
208 let s: Vec<usize> = s.iter().map(|&x| x as usize).collect();
209 sa_is::<T>(&s, upper as usize)
210}
211
212pub fn suffix_array_manual(s: &[i32], upper: i32) -> Vec<usize> {
213 assert!(upper >= 0);
214 for &elem in s {
215 assert!(0 <= elem && elem <= upper);
216 }
217 sa_is_i32::<DefaultThreshold>(s, upper)
218}
219
220pub fn suffix_array_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
221 let n = s.len();
222 let mut idx: Vec<usize> = (0..n).collect();
223 idx.sort_by_key(|&i| &s[i]);
224 let mut s2 = vec![0; n];
225 let mut now = 0;
226 for i in 0..n {
227 if i > 0 && s[idx[i - 1]] != s[idx[i]] {
228 now += 1;
229 }
230 s2[idx[i]] = now;
231 }
232 sa_is_i32::<DefaultThreshold>(&s2, now)
233}
234
235pub fn suffix_array(s: &str) -> Vec<usize> {
236 let s2: Vec<usize> = s.bytes().map(|x| x as usize).collect();
237 sa_is::<DefaultThreshold>(&s2, 255)
238}
239
240pub fn lcp_array_arbitrary<T: Ord>(s: &[T], sa: &[usize]) -> Vec<usize> {
245 assert!(s.len() == sa.len());
246 let n = s.len();
247 assert!(n >= 1);
248 let mut rnk = vec![0; n];
249 for i in 0..n {
250 assert!(sa[i] < n);
251 rnk[sa[i]] = i;
252 }
253 let mut lcp = vec![0; n - 1];
254 let mut h: usize = 0;
255 for i in 0..n - 1 {
256 h = h.saturating_sub(1);
257 if rnk[i] == 0 {
258 continue;
259 }
260 let j = sa[rnk[i] - 1];
261 while j + h < n && i + h < n {
262 if s[j + h] != s[i + h] {
263 break;
264 }
265 h += 1;
266 }
267 lcp[rnk[i] - 1] = h;
268 }
269 lcp
270}
271
272pub fn lcp_array(s: &str, sa: &[usize]) -> Vec<usize> {
273 let s: &[u8] = s.as_bytes();
274 lcp_array_arbitrary(s, sa)
275}
276
277pub fn z_algorithm_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
282 let n = s.len();
283 if n == 0 {
284 return vec![];
285 }
286 let mut z = vec![0; n];
287 z[0] = 0;
288 let mut j = 0;
289 for i in 1..n {
290 let mut k = if j + z[j] <= i {
291 0
292 } else {
293 std::cmp::min(j + z[j] - i, z[i - j])
294 };
295 while i + k < n && s[k] == s[i + k] {
296 k += 1;
297 }
298 z[i] = k;
299 if j + z[j] < i + z[i] {
300 j = i;
301 }
302 }
303 z[0] = n;
304 z
305}
306
307pub fn z_algorithm(s: &str) -> Vec<usize> {
308 let s: &[u8] = s.as_bytes();
309 z_algorithm_arbitrary(s)
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 enum ZeroThreshold {}
317 impl Threshold for ZeroThreshold {
318 fn threshold_naive() -> usize {
319 0
320 }
321 fn threshold_doubling() -> usize {
322 0
323 }
324 }
325
326 fn verify_all(str: &str, expected_array: &[usize]) {
327 let array: Vec<i32> = str.bytes().map(|x| x as i32).collect();
328 let sa = sa_doubling(&array);
329 assert_eq!(sa, expected_array);
330 let sa_naive = sa_naive(&array);
331 assert_eq!(sa_naive, expected_array);
332 let sa_is = sa_is_i32::<ZeroThreshold>(&array, 255);
333 assert_eq!(sa_is, expected_array);
334
335 let sa_str = suffix_array(str);
336 assert_eq!(sa_str, expected_array);
337 }
338
339 #[test]
340 fn test_sa_0() {
341 let array = vec![0, 1, 2, 3, 4];
342 let sa = sa_doubling(&array);
343 assert_eq!(sa, vec![0, 1, 2, 3, 4]);
344 }
345
346 #[test]
347 fn test_sa_1() {
348 let str = "abracadabra";
349 verify_all(str, &[10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]);
350 }
351
352 #[test]
353 fn test_sa_2() {
354 let str = "mmiissiissiippii"; verify_all(str, &[15, 14, 10, 6, 2, 11, 7, 3, 1, 0, 13, 12, 9, 5, 8, 4]);
356 }
357
358 #[test]
359 fn test_lcp_0() {
360 let str = "abracadabra";
361 let sa = suffix_array(str);
362 let lcp = lcp_array(str, &sa);
363 assert_eq!(lcp, &[1, 4, 1, 1, 0, 3, 0, 0, 0, 2]);
364 }
365
366 #[test]
367 fn test_lcp_1() {
368 let str = "mmiissiissiippii"; let sa = suffix_array(str);
370 let lcp = lcp_array(str, &sa);
371 assert_eq!(lcp, &[1, 2, 2, 6, 1, 1, 5, 0, 1, 0, 1, 0, 3, 1, 4]);
372 }
373
374 #[test]
375 fn test_z_0() {
376 let str = "abracadabra";
377 let lcp = z_algorithm(str);
378 assert_eq!(lcp, &[11, 0, 0, 1, 0, 1, 0, 4, 0, 0, 1]);
379 }
380
381 #[test]
382 fn test_z_1() {
383 let str = "ababababa";
384 let lcp = z_algorithm(str);
385 assert_eq!(lcp, &[9, 0, 7, 0, 5, 0, 3, 0, 1]);
386 }
387}