use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone)]
struct State {
len: usize,
link: Option<usize>,
next: HashMap<char, usize>,
end_pos: HashSet<usize>,
is_terminal: bool,
}
impl State {
fn new(len: usize) -> Self {
Self {
len,
link: None,
next: HashMap::new(),
end_pos: HashSet::new(),
is_terminal: false,
}
}
fn add_pos(&mut self, p: usize) {
self.end_pos.insert(p);
}
}
#[derive(Debug)]
pub struct SuffixAutomaton {
states: Vec<State>,
last: usize,
}
impl SuffixAutomaton {
pub fn new(text: &str) -> Self {
let mut sa = Self {
states: vec![State::new(0)], last: 0,
};
for (i, ch) in text.chars().enumerate() {
sa.extend(ch, i);
}
sa.mark_terminals();
sa.propagate_positions();
sa
}
fn extend(&mut self, ch: char, pos: usize) {
let mut p = self.last;
let cur = self.states.len();
self.states.push(State::new(self.states[p].len + 1));
self.states[cur].add_pos(pos);
while p != usize::MAX && !self.states[p].next.contains_key(&ch) {
self.states[p].next.insert(ch, cur);
p = self.states[p].link.unwrap_or(usize::MAX);
}
if p == usize::MAX {
self.states[cur].link = Some(0);
} else {
let q = self.states[p].next[&ch];
if self.states[p].len + 1 == self.states[q].len {
self.states[cur].link = Some(q);
} else {
let clone = self.states.len();
self.states.push(State::new(self.states[p].len + 1));
self.states[clone].next = self.states[q].next.clone();
self.states[clone].link = self.states[q].link;
while p != usize::MAX && self.states[p].next.get(&ch) == Some(&q) {
self.states[p].next.insert(ch, clone);
p = self.states[p].link.unwrap_or(usize::MAX);
}
self.states[q].link = Some(clone);
self.states[cur].link = Some(clone);
}
}
self.last = cur;
}
fn mark_terminals(&mut self) {
let mut p = self.last;
while p != 0 {
self.states[p].is_terminal = true;
p = self.states[p].link.unwrap_or(0);
}
self.states[0].is_terminal = true;
}
fn propagate_positions(&mut self) {
let mut order: Vec<usize> = (0..self.states.len()).collect();
order.sort_by_key(|&i| self.states[i].len);
for &i in order.iter().rev() {
let positions: Vec<_> = self.states[i].end_pos.iter().copied().collect();
if let Some(link) = self.states[i].link {
for p in positions {
self.states[link].add_pos(p);
}
}
}
}
pub fn contains(&self, pattern: &str) -> bool {
let mut s = 0;
for ch in pattern.chars() {
match self.states[s].next.get(&ch) {
Some(&nx) => s = nx,
None => return false,
}
}
true
}
pub fn find_all(&self, pattern: &str) -> Vec<usize> {
if pattern.is_empty() {
return Vec::new();
}
let mut s = 0;
for ch in pattern.chars() {
match self.states[s].next.get(&ch) {
Some(&nx) => s = nx,
None => return Vec::new(),
}
}
let mut result: Vec<_> = self.states[s]
.end_pos
.iter()
.filter_map(|&ep| {
let pat_len = pattern.chars().count();
if ep + 1 >= pat_len {
Some(ep + 1 - pat_len)
} else {
None
}
})
.collect();
result.sort_unstable();
result.dedup();
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_construction() {
let text = "banana";
let sa = SuffixAutomaton::new(text);
assert!(sa.contains("ana"));
assert!(sa.contains("ban"));
assert!(sa.contains("na"));
assert!(sa.contains("nan"));
}
#[test]
fn test_find_all() {
let text = "banana";
let sa = SuffixAutomaton::new(text);
assert_eq!(sa.find_all("ana"), vec![1, 3]);
assert_eq!(sa.find_all("na"), vec![2, 4]);
assert_eq!(sa.find_all("a"), vec![1, 3, 5]);
assert_eq!(sa.find_all("ban"), vec![0]);
assert_eq!(sa.find_all("xyz"), Vec::<usize>::new());
}
#[test]
fn test_empty_pattern() {
let text = "banana";
let sa = SuffixAutomaton::new(text);
assert!(sa.contains(""));
assert_eq!(sa.find_all(""), Vec::<usize>::new());
}
#[test]
fn test_unicode_text() {
let text = "こんにちは世界";
let sa = SuffixAutomaton::new(text);
assert!(sa.contains("にち"));
assert!(sa.contains("世界"));
assert!(!sa.contains("世に"));
assert_eq!(sa.find_all("にち"), vec![2]);
assert_eq!(sa.find_all("世界"), vec![5]);
}
#[test]
fn test_overlapping_patterns() {
let text = "aaaaa";
let sa = SuffixAutomaton::new(text);
assert_eq!(sa.find_all("aa"), vec![0, 1, 2, 3]);
assert_eq!(sa.find_all("aaa"), vec![0, 1, 2]);
}
#[test]
fn test_long_text() {
let text = "a".repeat(1000) + "b";
let sa = SuffixAutomaton::new(&text);
assert!(sa.contains("aaa"));
assert!(sa.contains("b"));
assert!(!sa.contains("c"));
let positions = sa.find_all("aa");
assert_eq!(positions.len(), 999);
}
#[test]
fn test_case_sensitivity() {
let text = "bAnAnA";
let sa = SuffixAutomaton::new(text);
assert!(!sa.contains("ana"));
assert!(sa.contains("AnA"));
assert_eq!(sa.find_all("ana"), Vec::<usize>::new());
assert_eq!(sa.find_all("AnA"), vec![1, 3]);
}
}