#![allow(clippy::needless_range_loop)]
pub fn suffix_array(s: &[u32], alphabet_size: usize) -> Vec<u32> {
let n = s.len();
let mut sa = vec![0u32; n];
if n <= 1 {
return sa;
}
let s_usize: Vec<usize> = s.iter().map(|&x| x as usize).collect();
let mut sa_usize = vec![0usize; n];
sais(&s_usize, &mut sa_usize, alphabet_size);
for (dst, &v) in sa.iter_mut().zip(sa_usize.iter()) {
*dst = v as u32;
}
sa
}
#[inline]
fn is_lms(t: &[bool], i: usize) -> bool {
i > 0 && t[i] && !t[i - 1]
}
fn lms_substrings_equal(s: &[usize], t: &[bool], a: usize, b: usize) -> bool {
let n = s.len();
if a == n - 1 || b == n - 1 {
return a == b;
}
let mut i = 0;
loop {
let ai = a + i;
let bi = b + i;
if ai >= n || bi >= n {
return false;
}
let a_lms = is_lms(t, ai);
let b_lms = is_lms(t, bi);
if i > 0 && a_lms && b_lms {
return true; }
if a_lms != b_lms {
return false; }
if s[ai] != s[bi] || t[ai] != t[bi] {
return false;
}
i += 1;
}
}
fn buckets(sizes: &[usize], tail: bool) -> Vec<usize> {
let mut b = vec![0usize; sizes.len()];
let mut sum = 0usize;
for (i, &sz) in sizes.iter().enumerate() {
sum += sz;
b[i] = if tail { sum } else { sum - sz };
}
b
}
fn bucket_sizes(s: &[usize], k: usize) -> Vec<usize> {
let mut sizes = vec![0usize; k];
for &c in s {
sizes[c] += 1;
}
sizes
}
fn induce(s: &[usize], sa: &mut [usize], t: &[bool], sizes: &[usize], k: usize) {
let n = s.len();
let none = usize::MAX;
let mut head = buckets(sizes, false);
for i in 0..n {
let j = sa[i];
if j != none && j > 0 && !t[j - 1] {
let c = s[j - 1];
sa[head[c]] = j - 1;
head[c] += 1;
}
}
let _ = k;
let mut tail = buckets(sizes, true);
for i in (0..n).rev() {
let j = sa[i];
if j != none && j > 0 && t[j - 1] {
let c = s[j - 1];
tail[c] -= 1;
sa[tail[c]] = j - 1;
}
}
}
fn sais(s: &[usize], sa: &mut [usize], k: usize) {
let n = s.len();
let none = usize::MAX;
if n == 1 {
sa[0] = 0;
return;
}
let mut t = vec![false; n];
t[n - 1] = true;
for i in (0..n - 1).rev() {
t[i] = s[i] < s[i + 1] || (s[i] == s[i + 1] && t[i + 1]);
}
let sizes = bucket_sizes(s, k);
for v in sa.iter_mut() {
*v = none;
}
let mut tail = buckets(&sizes, true);
for i in 1..n {
if is_lms(&t, i) {
let c = s[i];
tail[c] -= 1;
sa[tail[c]] = i;
}
}
induce(s, sa, &t, &sizes, k);
let mut lms_sorted: Vec<usize> = Vec::new();
for i in 0..n {
let j = sa[i];
if j != none && is_lms(&t, j) {
lms_sorted.push(j);
}
}
let mut names = vec![none; n];
let mut name = 0usize;
names[lms_sorted[0]] = 0;
let mut prev = lms_sorted[0];
for &cur in lms_sorted.iter().skip(1) {
if !lms_substrings_equal(s, &t, prev, cur) {
name += 1;
}
names[cur] = name;
prev = cur;
}
let num_names = name + 1;
let mut lms_positions: Vec<usize> = (1..n).filter(|&i| is_lms(&t, i)).collect();
lms_positions.sort_unstable();
let reduced: Vec<usize> = lms_positions.iter().map(|&p| names[p]).collect();
let mut reduced_sa = vec![0usize; reduced.len()];
if num_names == reduced.len() {
for (i, &nm) in reduced.iter().enumerate() {
reduced_sa[nm] = i;
}
} else {
sais(&reduced, &mut reduced_sa, num_names);
}
for v in sa.iter_mut() {
*v = none;
}
let mut tail = buckets(&sizes, true);
for idx in (0..reduced_sa.len()).rev() {
let p = lms_positions[reduced_sa[idx]];
let c = s[p];
tail[c] -= 1;
sa[tail[c]] = p;
}
induce(s, sa, &t, &sizes, k);
}
pub fn lcp_kasai(s: &[u32], sa: &[u32]) -> Vec<u32> {
let n = s.len();
let mut lcp = vec![0u32; n];
if n == 0 {
return lcp;
}
let mut rank = vec![0usize; n];
for (i, &p) in sa.iter().enumerate() {
rank[p as usize] = i;
}
let mut h = 0usize;
for i in 0..n {
if rank[i] > 0 {
let j = sa[rank[i] - 1] as usize;
while i + h < n && j + h < n && s[i + h] == s[j + h] {
h += 1;
}
lcp[rank[i]] = h as u32;
h = h.saturating_sub(1);
} else {
h = 0;
}
}
lcp
}
#[cfg(test)]
mod tests {
use super::*;
fn naive_sa(s: &[u32]) -> Vec<u32> {
let n = s.len();
let mut idx: Vec<u32> = (0..n as u32).collect();
idx.sort_by(|&a, &b| s[a as usize..].cmp(&s[b as usize..]));
idx
}
fn naive_lcp(s: &[u32], sa: &[u32]) -> Vec<u32> {
let mut lcp = vec![0u32; sa.len()];
for i in 1..sa.len() {
let a = &s[sa[i - 1] as usize..];
let b = &s[sa[i] as usize..];
let mut k = 0;
while k < a.len() && k < b.len() && a[k] == b[k] {
k += 1;
}
lcp[i] = k as u32;
}
lcp
}
fn with_sentinel(body: &[u32]) -> (Vec<u32>, usize) {
let mut s: Vec<u32> = body.iter().map(|&x| x + 1).collect();
s.push(0);
let k = s.iter().copied().max().unwrap() as usize + 1;
(s, k)
}
#[test]
fn matches_naive_on_small_strings() {
let cases: &[&[u32]] = &[
&[],
&[0],
&[1, 1, 1, 1],
&[3, 1, 2, 1, 2, 3],
&[1, 2, 1, 2, 1, 2, 1],
&[5, 4, 3, 2, 1],
&[1, 2, 3, 4, 5],
];
for c in cases {
let (s, k) = with_sentinel(c);
let sa = suffix_array(&s, k);
assert_eq!(sa, naive_sa(&s), "SA mismatch on {c:?}");
assert_eq!(
lcp_kasai(&s, &sa),
naive_lcp(&s, &sa),
"LCP mismatch on {c:?}"
);
}
}
#[test]
fn matches_naive_on_random_strings() {
let mut state: u64 = 0x9E3779B97F4A7C15;
let mut next = || {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(state >> 33) as u32
};
for _ in 0..400 {
let len = (next() % 60) as usize + 1;
let alpha = (next() % 4) + 1; let body: Vec<u32> = (0..len).map(|_| next() % alpha).collect();
let (s, k) = with_sentinel(&body);
let sa = suffix_array(&s, k);
assert_eq!(sa, naive_sa(&s), "SA mismatch on {body:?}");
assert_eq!(
lcp_kasai(&s, &sa),
naive_lcp(&s, &sa),
"LCP mismatch on {body:?}"
);
}
}
#[test]
fn finds_repeat_via_lcp() {
let (s, k) = with_sentinel(&[1, 2, 3, 1, 2, 3]);
let sa = suffix_array(&s, k);
let lcp = lcp_kasai(&s, &sa);
assert!(
lcp.iter().any(|&l| l >= 3),
"expected an LCP ≥ 3, got {lcp:?}"
);
}
}