use std::{collections::HashMap, fmt::Display, hash::Hash};
use crate::{
bfs_queues::BfsQueue,
character_sets::*,
compact_tables::{CompactTable, CompactTableBuilder},
errors::*,
minimizer::Minimizer,
partitions::Partition,
smt_strings::SmtString,
};
#[derive(Debug)]
pub struct Automaton {
num_states: usize,
num_final_states: usize,
initial_state: usize,
states: Box<[State]>,
}
#[derive(Debug)]
pub struct State {
id: usize,
is_final: bool,
classes: CharPartition,
successor: Box<[usize]>,
default_successor: Option<usize>,
}
#[derive(Debug)]
pub struct EdgeIterator<'a> {
state_array: &'a [State],
state: &'a State,
index: usize,
}
#[derive(Debug)]
pub struct FinalStateIterator<'a> {
state_array: &'a [State],
index: usize,
}
#[derive(Debug)]
struct StateMapping {
new_id: Vec<usize>,
old_id: Vec<usize>,
}
impl StateMapping {
fn from_array(num_nodes: usize, nodes_to_keep: &[usize]) -> Self {
let mut new_id = vec![0; num_nodes];
let num_new_nodes = nodes_to_keep.len();
let mut old_id = vec![0; num_new_nodes];
for (i, &node_id) in nodes_to_keep.iter().enumerate() {
new_id[node_id] = i;
old_id[i] = node_id;
}
StateMapping { new_id, old_id }
}
fn from_partition(p: &Partition) -> Self {
let num_nodes = p.size() as usize;
let mut new_id = vec![0; num_nodes];
let num_new_nodes = p.index() as usize;
let mut old_id = vec![0; num_new_nodes];
for s in 0..p.size() {
new_id[s as usize] = (p.block_id(s) - 1) as usize;
}
for b in 1..p.num_blocks() {
let s = p.pick_element(b);
old_id[b as usize - 1] = s as usize;
}
StateMapping { new_id, old_id }
}
fn num_new_states(&self) -> usize {
self.old_id.len()
}
fn is_class_rep(&self, id: usize) -> bool {
self.old_id[self.new_id[id]] == id
}
}
impl Automaton {
pub fn initial_state(&self) -> &State {
&self.states[self.initial_state]
}
pub fn state(&self, id: usize) -> &State {
&self.states[id]
}
pub fn num_states(&self) -> usize {
self.num_states
}
pub fn num_final_states(&self) -> usize {
self.num_final_states
}
pub fn default_successor(&self, s: &State) -> Option<&State> {
s.default_successor.map(|i| &self.states[i])
}
pub fn class_next(&self, s: &State, cid: ClassId) -> &State {
debug_assert!(s.valid_class_id(cid));
let i = match cid {
ClassId::Interval(i) => s.successor[i],
ClassId::Complement => s.default_successor.unwrap(),
};
&self.states[i]
}
pub fn char_set_next(&self, s: &State, set: &CharSet) -> Result<&State, Error> {
let cid = s.classes.class_of_set(set)?;
Ok(self.class_next(s, cid))
}
pub fn next(&self, s: &State, c: u32) -> &State {
let cid = s.classes.class_of_char(c);
self.class_next(s, cid)
}
pub fn str_next<'a>(&'a self, s: &'a State, str: &SmtString) -> &'a State {
str.iter().fold(s, |s1, c| self.next(s1, *c))
}
pub fn accepts(&self, str: &SmtString) -> bool {
self.str_next(self.initial_state(), str).is_final
}
pub fn states(&self) -> impl Iterator<Item = &State> {
self.states.iter()
}
pub fn edges<'a>(&'a self, s: &'a State) -> EdgeIterator<'a> {
EdgeIterator {
state_array: &self.states,
state: s,
index: 0,
}
}
pub fn final_states(&self) -> FinalStateIterator<'_> {
FinalStateIterator {
state_array: &self.states,
index: 0,
}
}
pub fn combined_char_partition(&self) -> CharPartition {
merge_partition_list(self.states().map(|x| &x.classes))
}
pub fn pick_alphabet(&self) -> Vec<u32> {
self.combined_char_partition().picks().collect()
}
pub fn compile_successors(&self) -> CompactTable {
let alphabet = self.pick_alphabet();
let num_states = self.num_states as u32;
let alpha_size = alphabet.len() as u32;
let mut builder = CompactTableBuilder::new(num_states, alpha_size);
for s in self.states() {
let id = s.id as u32;
if s.has_default_successor() {
let d = s.default_successor.unwrap();
builder.set_default(id, d as u32);
}
let suc: Vec<(u32, u32)> = alphabet
.iter()
.enumerate()
.filter(|(_, &c)| !s.char_maps_to_default(c))
.map(|(i, &c)| (i as u32, self.next(s, c).id as u32))
.collect();
builder.set_successors(id, &suc);
}
builder.build()
}
fn remap_nodes(&mut self, remap: &StateMapping) {
let i = self.initial_state;
self.initial_state = remap.new_id[i];
self.num_final_states = 0;
let num_new_nodes = remap.num_new_states();
let mut new_states = Vec::with_capacity(num_new_nodes);
for i in 0..num_new_nodes {
let old_id = remap.old_id[i];
let s = &mut self.states[old_id];
new_states.push(s.remap_nodes(remap));
debug_assert!(new_states[i].id == i);
if s.is_final {
debug_assert!(new_states[i].is_final);
self.num_final_states += 1;
}
}
self.num_states = num_new_nodes;
self.states = new_states.into();
}
pub fn remove_unreachable_states(&mut self) {
let mut queue = BfsQueue::new();
let mut reachable = Vec::new();
queue.push(self.initial_state);
while let Some(i) = queue.pop() {
reachable.push(i);
let s = self.state(i);
for (_, next) in self.edges(s) {
queue.push(next.id);
}
}
reachable.sort_unstable();
let remap = StateMapping::from_array(self.num_states, &reachable);
self.remap_nodes(&remap);
}
pub fn minimize(&mut self) {
let is_final = |i: u32| self.state(i as usize).is_final;
let transition_map = self.compile_successors();
let delta = |i, j| transition_map.eval(i, j);
let num_states = self.num_states as u32;
let alphabet_size = transition_map.alphabet_size() as u32;
let mut minimizer = Minimizer::new(num_states, alphabet_size, delta, is_final);
let p = minimizer.refine();
if (p.index() as usize) < self.num_states {
let remap = StateMapping::from_partition(p);
self.remap_nodes(&remap)
}
}
#[cfg(test)]
pub fn test_minimizer(&self) {
let is_final = |i: u32| self.state(i as usize).is_final;
let transition_map = self.compile_successors();
let delta = |i, j| transition_map.eval(i, j);
let num_states = self.num_states as u32;
let alphabet_size = transition_map.alphabet_size() as u32;
let mut minimizer = Minimizer::new(num_states, alphabet_size, delta, is_final);
minimizer.refine_and_trace();
}
}
impl State {
pub fn id(&self) -> usize {
self.id
}
pub fn is_final(&self) -> bool {
self.is_final
}
pub fn num_successors(&self) -> usize {
self.classes.len()
}
pub fn has_default_successor(&self) -> bool {
self.default_successor.is_some()
}
pub fn default_successor(&self) -> Option<usize> {
self.default_successor
}
pub fn valid_class_id(&self, cid: ClassId) -> bool {
self.classes.valid_class_id(cid)
}
pub fn char_maps_to_default(&self, c: u32) -> bool {
self.has_default_successor() && self.classes.class_of_char(c) == ClassId::Complement
}
pub fn char_classes(&self) -> ClassIdIterator<'_> {
self.classes.class_ids()
}
pub fn class_of_char(&self, x: u32) -> ClassId {
self.classes.class_of_char(x)
}
pub fn char_picks(&self) -> PickIterator<'_> {
self.classes.picks()
}
pub fn char_ranges(&self) -> impl Iterator<Item = &CharSet> {
self.classes.ranges()
}
fn remap_nodes(&mut self, remap: &StateMapping) -> Self {
let old_id = self.id;
debug_assert!(remap.is_class_rep(old_id));
let new_id = remap.new_id[old_id];
let new_default = self.default_successor.map(|i| remap.new_id[i]);
let mut new_successors = std::mem::take(&mut self.successor);
for s in new_successors.as_mut() {
*s = remap.new_id[*s];
}
let new_classes = std::mem::take(&mut self.classes);
State {
id: new_id,
is_final: self.is_final,
default_successor: new_default,
successor: new_successors,
classes: new_classes,
}
}
}
impl Display for State {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "s{}", self.id)
}
}
impl Display for Automaton {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn plural(n: usize) -> &'static str {
if n == 1 {
""
} else {
"s"
}
}
writeln!(f, "{} states", self.num_states)?;
writeln!(f, "initial state: {}", self.initial_state())?;
write!(f, "final state{}:", plural(self.num_final_states))?;
for s in self.final_states() {
write!(f, " {}", &s)?;
}
writeln!(f)?;
writeln!(f, "transitions:")?;
for s in self.states.iter() {
for c in s.char_ranges() {
let d = self.char_set_next(s, c).unwrap();
writeln!(f, " \u{03B4}({s}, {c}) = {d}")?;
}
if s.has_default_successor() {
let d = s.default_successor().unwrap();
writeln!(f, " \u{03B4}({s}, ...) = s{d}")?;
}
}
writeln!(f)?;
Ok(())
}
}
impl<'a> Iterator for EdgeIterator<'a> {
type Item = (ClassId, &'a State);
fn next(&mut self) -> Option<Self::Item> {
let i = self.index;
let source = self.state;
if i < source.num_successors() {
self.index += 1;
let next_id = source.successor[i];
Some((ClassId::Interval(i), &self.state_array[next_id]))
} else if i == source.num_successors() && source.has_default_successor() {
self.index += 1;
let next_id = source.default_successor.unwrap();
Some((ClassId::Complement, &self.state_array[next_id]))
} else {
None
}
}
}
impl<'a> Iterator for FinalStateIterator<'a> {
type Item = &'a State;
fn next(&mut self) -> Option<Self::Item> {
let mut i = self.index;
let a = self.state_array;
while i < a.len() {
if a[i].is_final {
self.index = i + 1;
return Some(&a[i]);
}
i += 1;
}
self.index = i;
None
}
}
#[derive(Debug)]
struct StateInConstruction {
is_final: bool,
default_successor: Option<usize>,
transitions: Vec<(CharSet, usize)>,
}
impl StateInConstruction {
fn new() -> StateInConstruction {
StateInConstruction {
is_final: false,
default_successor: None,
transitions: Vec::new(),
}
}
fn set_default_successor(&mut self, next_id: usize) {
self.default_successor = Some(next_id);
}
fn add_transition(&mut self, set: &CharSet, next_id: usize) {
self.transitions.push((*set, next_id))
}
fn choose_default_successor(&mut self) {
fn maj_candidate(s: &[(CharSet, usize)]) -> usize {
let mut maj = s[0].1;
let mut k = 1;
for (_, x) in &s[1..] {
let x = *x;
if k == 0 {
maj = x;
k = 1;
} else if x == maj {
k += 1;
} else {
k -= 1;
}
}
maj
}
fn count(s: &[(CharSet, usize)], m: usize) -> usize {
let mut n = 0;
for (_, x) in s {
if *x == m {
n += 1;
}
}
n
}
if self.default_successor.is_none() && !self.transitions.is_empty() {
let m = maj_candidate(&self.transitions);
let n = count(&self.transitions, m);
if n >= self.transitions.len() / 2 {
self.set_default_successor(m);
}
}
}
fn remove_transitions_to_default(&mut self) {
if let Some(i) = self.default_successor {
self.transitions.retain(|x| x.1 != i)
}
}
fn cleanup(&mut self) {
self.choose_default_successor();
self.remove_transitions_to_default();
}
fn make_partition(&self) -> Result<CharPartition, Error> {
CharPartition::try_from_iter(self.transitions.iter().map(|x| x.0))
}
fn make_successor(&self, p: &CharPartition) -> Box<[usize]> {
let n = p.len();
let mut result = vec![0; n];
for s in &self.transitions {
let c = s.0.pick();
if let ClassId::Interval(i) = p.class_of_char(c) {
result[i] = s.1;
} else {
panic!();
}
}
result.into()
}
}
#[derive(Debug)]
pub struct AutomatonBuilder<T> {
size: usize,
id_map: HashMap<T, usize>,
states: Vec<StateInConstruction>,
}
impl<T: Eq + Hash + Clone> AutomatonBuilder<T> {
fn get_state_id(&mut self, state: &T) -> usize {
match self.id_map.get(state) {
Some(i) => *i,
None => {
let i = self.size;
let new_state = StateInConstruction::new();
self.states.push(new_state);
self.id_map.insert(state.clone(), i);
self.size += 1;
i
}
}
}
pub fn new(initial_state: &T) -> Self {
let mut new = AutomatonBuilder {
size: 0,
id_map: HashMap::new(),
states: Vec::new(),
};
new.get_state_id(initial_state);
new
}
pub fn mark_final(&mut self, state: &T) -> &mut Self {
let i = self.get_state_id(state);
self.states[i].is_final = true;
self
}
pub fn set_default_successor(&mut self, state: &T, next: &T) -> &mut Self {
let i = self.get_state_id(state);
let j = self.get_state_id(next);
self.states[i].set_default_successor(j);
self
}
pub fn add_transition(&mut self, state: &T, set: &CharSet, next: &T) -> &mut Self {
let i = self.get_state_id(state);
let j = self.get_state_id(next);
self.states[i].add_transition(set, j);
self
}
pub fn build(&mut self) -> Result<Automaton, Error> {
let n = self.size;
let mut num_final_states = 0;
let mut state_array = Vec::with_capacity(n);
for (i, s) in self.states.iter_mut().enumerate() {
s.cleanup();
let p = s.make_partition()?;
if s.default_successor.is_some() && p.empty_complement() {
return Err(Error::EmptyComplementaryClass);
}
if s.default_successor.is_none() && !p.empty_complement() {
return Err(Error::MissingDefaultSuccessor);
}
let successor = s.make_successor(&p);
if s.is_final {
num_final_states += 1;
}
let new_state = State {
id: i,
is_final: s.is_final,
successor,
classes: p,
default_successor: s.default_successor,
};
state_array.push(new_state);
}
Ok(Automaton {
num_states: n,
num_final_states,
initial_state: 0,
states: state_array.into(),
})
}
pub fn build_unchecked(&mut self) -> Automaton {
let num_states = self.size;
let mut num_final_states = 0;
let mut state_array = Vec::with_capacity(num_states);
for (i, s) in self.states.iter_mut().enumerate() {
s.cleanup();
let p = s.make_partition().unwrap();
let successor = s.make_successor(&p);
if s.is_final {
num_final_states += 1;
}
state_array.push(State {
id: i,
is_final: s.is_final,
successor,
classes: p,
default_successor: s.default_successor,
})
}
Automaton {
num_states,
num_final_states,
initial_state: 0,
states: state_array.into(),
}
}
}
#[cfg(test)]
mod test {
use crate::smt_strings::char_to_smt;
use super::*;
fn graph() -> Vec<(u32, char, u32)> {
vec![
(0, 'a', 0),
(0, 'b', 1),
(0, 'c', 2),
(1, 'a', 3),
(1, 'c', 2),
(2, 'b', 3),
(2, 'c', 3),
(3, 'a', 0),
(3, 'b', 1),
(3, 'c', 3),
]
}
fn check_automaton(automaton: &Automaton) {
assert_eq!(automaton.num_states(), 5);
assert_eq!(automaton.initial_state().id, 0);
assert_eq!(automaton.num_final_states(), 1);
let s0 = automaton.state(0);
assert_eq!(automaton.next(s0, 'a' as u32).id, 0);
assert_eq!(automaton.next(s0, 'b' as u32).id, 1);
assert_eq!(automaton.next(s0, 'c' as u32).id, 2);
assert_eq!(automaton.next(s0, 'd' as u32).id, 4);
assert!(!s0.is_final());
let s1 = automaton.state(1);
assert_eq!(automaton.next(s1, 'a' as u32).id, 3);
assert_eq!(automaton.next(s1, 'c' as u32).id, 2);
assert_eq!(automaton.next(s1, 'd' as u32).id, 4);
assert!(!s1.is_final());
let s2 = automaton.state(2);
assert_eq!(automaton.next(s2, 'a' as u32).id, 4);
assert_eq!(automaton.next(s2, 'b' as u32).id, 3);
assert_eq!(automaton.next(s2, 'c' as u32).id, 3);
assert_eq!(automaton.next(s2, 'd' as u32).id, 4);
assert!(!s2.is_final());
let s3 = automaton.state(3);
assert_eq!(automaton.next(s3, 'a' as u32).id, 0);
assert_eq!(automaton.next(s3, 'b' as u32).id, 1);
assert_eq!(automaton.next(s3, 'c' as u32).id, 3);
assert_eq!(automaton.next(s3, 'd' as u32).id, 4);
assert!(s3.is_final());
}
#[test]
fn test_builder() {
let g = graph();
let mut builder = AutomatonBuilder::new(&0);
for (source, label, dest) in &g {
builder.add_transition(source, &CharSet::singleton(*label as u32), dest);
}
builder
.set_default_successor(&0, &4)
.set_default_successor(&1, &4)
.set_default_successor(&2, &4)
.set_default_successor(&3, &4)
.set_default_successor(&4, &4)
.mark_final(&3);
let automaton = builder.build().unwrap();
println!(
"Result automaton: {} states, initial state: {}",
automaton.num_states(),
automaton.initial_state()
);
for state in automaton.states.iter() {
println!("State {state}");
if state.is_final() {
println!(" final");
}
match state.default_successor() {
None => println!(" no default successor"),
Some(x) => println!(" default successor: {x}"),
}
println!(" transitions:");
for (cid, next) in automaton.edges(state) {
println!(" delta({state}, {cid}) = {next}");
}
}
println!("{automaton}");
check_automaton(&automaton);
let map = automaton.compile_successors();
let alphabet = automaton.pick_alphabet();
print!("Alphabet:");
for c in &alphabet {
print!(" {}", char_to_smt(*c));
}
println!();
let alphabet_size = alphabet.len() as u32;
for s in 0..automaton.num_states() as u32 {
for c in 0..alphabet_size {
let next = map.eval(s, c);
let char = alphabet[c as usize];
println!(
" d({}, {}) = {} <=> \u{03B4}(s{}, {}) = s{}",
s,
c,
next,
s,
char_to_smt(char),
next
);
}
println!();
}
println!("Compact transition map: {map}");
automaton.test_minimizer();
}
#[test]
fn test_remove_unreachable() {
let g = graph();
let mut builder = AutomatonBuilder::new(&0);
builder.add_transition(&5, &CharSet::singleton('z' as u32), &6);
builder.set_default_successor(&5, &8);
builder.add_transition(&6, &CharSet::singleton('y' as u32), &5);
builder.set_default_successor(&6, &8);
builder.set_default_successor(&8, &5);
builder.mark_final(&6);
for (source, label, dest) in &g {
builder.add_transition(source, &CharSet::singleton(*label as u32), dest);
}
builder
.set_default_successor(&0, &4)
.set_default_successor(&1, &4)
.set_default_successor(&2, &4)
.set_default_successor(&3, &4)
.set_default_successor(&4, &4)
.mark_final(&3);
let mut automaton = builder.build().unwrap();
println!(
"Result automaton: {} states, initial state: {}",
automaton.num_states(),
automaton.initial_state()
);
assert_eq!(automaton.num_states(), 8);
println!("{automaton}");
automaton.remove_unreachable_states();
println!(
"After removing unreachable states: {} states, initial state: {}",
automaton.num_states(),
automaton.initial_state()
);
println!("{automaton}");
check_automaton(&automaton);
}
#[test]
fn test_minimizer() {
let builder = &mut AutomatonBuilder::new(&0);
builder.add_transition(&0, &CharSet::singleton('a' as u32), &1);
builder.add_transition(&1, &CharSet::singleton('b' as u32), &2);
builder.add_transition(&2, &CharSet::singleton('c' as u32), &3);
builder.add_transition(&3, &CharSet::singleton('a' as u32), &4);
builder.mark_final(&3);
builder.add_transition(&4, &CharSet::singleton('a' as u32), &5);
builder.mark_final(&4);
builder.add_transition(&5, &CharSet::singleton('a' as u32), &3);
builder.mark_final(&5);
builder.set_default_successor(&0, &6);
builder.set_default_successor(&1, &7);
builder.set_default_successor(&2, &7);
builder.set_default_successor(&3, &8);
builder.set_default_successor(&4, &6);
builder.set_default_successor(&5, &7);
builder.set_default_successor(&6, &7);
builder.set_default_successor(&7, &8);
builder.set_default_successor(&8, &6);
let automaton = &mut builder.build().unwrap();
println!(
"Result automaton: {} states, initial state: {}",
automaton.num_states(),
automaton.initial_state()
);
println!("{automaton}");
automaton.test_minimizer();
automaton.minimize();
println!(
"\nAfter minimization: {} states, initial state: {}",
automaton.num_states(),
automaton.initial_state(),
);
println!("{automaton}");
}
}