#![allow(non_snake_case)]
use buffertk::Unpackable;
use crate::Error;
use crate::builder::Builder;
use crate::sigma::Sigma;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum LSType {
L,
S,
}
fn get_types(S: &[u32]) -> Vec<LSType> {
let mut prev = (LSType::S, 0);
let mut types = Vec::with_capacity(S.len());
for &s in S.iter().rev() {
prev = if s < prev.1 || (s == prev.1 && prev.0 == LSType::S) {
(LSType::S, s)
} else {
(LSType::L, s)
};
types.push(prev.0);
}
types.reverse();
types
}
fn is_lms(T: &[LSType], i: usize) -> bool {
i > 0 && T[i - 1] == LSType::L && T[i] == LSType::S
}
fn induce_L(
sigma: &Sigma,
S: &[u32],
SA: &mut [usize],
T: &[LSType],
buckets: &mut Vec<usize>,
) -> Result<(), Error> {
sigma.bucket_starts(buckets)?;
for i in 0..S.len() {
let j = SA[i].wrapping_sub(1);
if j < usize::MAX - 1 && T[j] == LSType::L {
SA[buckets[S[j] as usize]] = j;
buckets[S[j] as usize] += 1;
}
}
Ok(())
}
fn induce_S(
sigma: &Sigma,
S: &[u32],
SA: &mut [usize],
T: &[LSType],
buckets: &mut Vec<usize>,
) -> Result<(), Error> {
sigma.bucket_limits(buckets)?;
for i in (0..S.len()).rev() {
let j = SA[i].wrapping_sub(1);
if j < usize::MAX - 1 && T[j] == LSType::S {
buckets[S[j] as usize] -= 1;
SA[buckets[S[j] as usize]] = j;
}
}
Ok(())
}
pub fn sais(sigma: &Sigma, S: &[u32], SA: &mut [usize]) -> Result<(), Error> {
assert!(sigma.K() <= S.len());
assert!(S.len() == SA.len());
assert!(!S.is_empty());
assert_eq!(S[S.len() - 1], 0);
let T = get_types(S);
let T: &[LSType] = &T;
let mut buckets = vec![0usize; sigma.K()];
sigma.bucket_limits(&mut buckets)?;
for item in SA.iter_mut().take(S.len()) {
*item = usize::MAX;
}
for i in 0..S.len() {
if is_lms(T, i) {
buckets[S[i] as usize] -= 1;
SA[buckets[S[i] as usize]] = i;
}
}
induce_L(sigma, S, SA, T, &mut buckets)?;
induce_S(sigma, S, SA, T, &mut buckets)?;
let mut substrings = 0usize;
for i in 0..S.len() {
if is_lms(T, SA[i]) {
SA[substrings] = SA[i];
substrings += 1;
}
}
for item in SA.iter_mut().take(S.len()).skip(substrings) {
*item = usize::MAX;
}
let mut name = 0;
let mut prev = usize::MAX;
for i in 0..substrings {
let pos: usize = SA[i];
let mut diff = false;
for d in 0..S.len() {
if prev == usize::MAX || S[pos + d] != S[prev + d] || T[pos + d] != T[prev + d] {
diff = true;
break;
} else if d > 0 && (is_lms(T, pos + d) || is_lms(T, prev + d)) {
break;
}
}
if diff {
name += 1;
prev = pos;
}
let pos = pos / 2;
SA[substrings + pos] = name - 1
}
let mut j = S.len() - 1;
for i in (substrings..S.len()).rev() {
if SA[i] != usize::MAX {
SA[j] = SA[i];
j -= 1;
}
}
let (SA1, S1) = SA.split_at_mut(substrings);
let (_, S1) = S1.split_at_mut(S1.len() - substrings);
if name < substrings {
if S1.iter().any(|x| *x as u64 > u32::MAX as u64) {
return Err(Error::TextTooLong);
}
let S1: Vec<u32> = S1.iter().map(|x| *x as u32).collect();
let mut buf = Vec::new();
let mut builder = Builder::new(&mut buf);
Sigma::construct(S1[..S1.len() - 1].iter().copied(), &mut builder)?;
drop(builder);
let sigma1 = Sigma::unpack(&buf)
.expect("freshly constructed sigma should unpack")
.0;
sais(&sigma1, &S1, SA1)?;
} else {
for i in 0..S1.len() {
SA1[S1[i]] = i;
}
}
let mut j = 0;
for i in 0..S.len() {
if is_lms(T, i) {
S1[j] = i;
j += 1;
}
}
for i in 0..substrings {
SA1[i] = S1[SA1[i]];
}
for item in SA.iter_mut().take(S.len()).skip(substrings) {
*item = usize::MAX;
}
sigma.bucket_limits(&mut buckets)?;
for i in (0..substrings).rev() {
j = SA[i];
SA[i] = usize::MAX;
buckets[S[j] as usize] -= 1;
SA[buckets[S[j] as usize]] = j;
}
induce_L(sigma, S, SA, T, &mut buckets)?;
induce_S(sigma, S, SA, T, &mut buckets)?;
Ok(())
}
#[cfg(test)]
mod tests {
use buffertk::Unpackable;
use super::super::test_cases_for;
use super::super::test_util::TestCase;
use super::*;
fn check_get_types(t: &TestCase) {
let types: Vec<LSType> = t
.lstype
.chars()
.map(|c| if c == 'L' { LSType::L } else { LSType::S })
.collect();
let returned = get_types(t.S);
assert_eq!(&types, &returned);
}
test_cases_for!(get_types, super::check_get_types);
fn check_is_lms(t: &TestCase) {
let types = get_types(t.S);
let returned: String = types
.iter()
.enumerate()
.map(|(i, _)| if is_lms(&types, i) { '*' } else { ' ' })
.collect();
assert_eq!(t.lmspos, returned);
}
test_cases_for!(is_lms, super::check_is_lms);
fn check_sais(t: &TestCase) {
let sigma = t.sigma();
let sigma = Sigma::unpack(&sigma).expect("test should unpack").0;
let mut SA = vec![0usize; t.S.len()];
sais(&sigma, t.S, &mut SA).unwrap();
assert_eq!(t.SA, SA);
}
test_cases_for!(sais, super::check_sais);
}