use alloc::collections::{BTreeMap, BTreeSet};
use alloc::vec;
use alloc::vec::Vec;
pub struct Nfa {
transitions: Vec<Vec<(u32, usize)>>,
epsilon: Vec<Vec<usize>>,
}
impl Nfa {
pub fn new() -> Self {
Self {
transitions: Vec::new(),
epsilon: Vec::new(),
}
}
pub fn add_state(&mut self) -> usize {
let id = self.transitions.len();
self.transitions.push(Vec::new());
self.epsilon.push(Vec::new());
id
}
pub fn add_transition(&mut self, from: usize, sym: u32, to: usize) {
self.transitions[from].push((sym, to));
}
pub fn add_epsilon(&mut self, from: usize, to: usize) {
self.epsilon[from].push(to);
}
pub fn num_states(&self) -> usize {
self.transitions.len()
}
pub fn transitions(&self) -> &[Vec<(u32, usize)>] {
&self.transitions
}
pub fn epsilons(&self) -> &[Vec<usize>] {
&self.epsilon
}
}
impl Default for Nfa {
fn default() -> Self {
Self::new()
}
}
pub struct Dfa {
pub transitions: Vec<Vec<(u32, usize)>>,
}
impl Dfa {
pub fn num_states(&self) -> usize {
self.transitions.len()
}
}
fn epsilon_closure(nfa: &Nfa, states: &BTreeSet<usize>) -> BTreeSet<usize> {
let mut result = states.clone();
let mut worklist: Vec<usize> = states.iter().copied().collect();
while let Some(s) = worklist.pop() {
for &target in &nfa.epsilon[s] {
if result.insert(target) {
worklist.push(target);
}
}
}
result
}
pub fn subset_construction(nfa: &Nfa) -> (Dfa, Vec<Vec<usize>>) {
let initial: BTreeSet<usize> = [0].into_iter().collect();
let initial_closed = epsilon_closure(nfa, &initial);
let mut dfa_nfa_sets: Vec<BTreeSet<usize>> = vec![initial_closed.clone()];
let mut state_index: BTreeMap<BTreeSet<usize>, usize> = BTreeMap::new();
state_index.insert(initial_closed, 0);
let mut transitions: Vec<Vec<(u32, usize)>> = vec![Vec::new()];
let mut worklist = vec![0usize];
while let Some(dfa_idx) = worklist.pop() {
let dfa_state = dfa_nfa_sets[dfa_idx].clone();
let mut symbol_targets: BTreeMap<u32, BTreeSet<usize>> = BTreeMap::new();
for &nfa_state in &dfa_state {
for &(sym, target) in &nfa.transitions[nfa_state] {
symbol_targets.entry(sym).or_default().insert(target);
}
}
for (sym, targets) in symbol_targets {
let closed = epsilon_closure(nfa, &targets);
if closed.is_empty() {
continue;
}
let target_idx = if let Some(&idx) = state_index.get(&closed) {
idx
} else {
let idx = dfa_nfa_sets.len();
state_index.insert(closed.clone(), idx);
dfa_nfa_sets.push(closed);
transitions.push(Vec::new());
worklist.push(idx);
idx
};
transitions[dfa_idx].push((sym, target_idx));
}
}
let nfa_sets: Vec<Vec<usize>> = dfa_nfa_sets
.into_iter()
.map(|s| s.into_iter().collect())
.collect();
(Dfa { transitions }, nfa_sets)
}
pub fn hopcroft_minimize(dfa: &Dfa, initial_partition: &[usize]) -> (Dfa, Vec<usize>) {
let mut partition_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
for (state, &p) in initial_partition.iter().enumerate() {
partition_map.entry(p).or_default().push(state);
}
let mut partition_ids: Vec<usize> = partition_map.keys().copied().collect();
partition_ids.sort();
let mut partitions: Vec<Vec<usize>> = partition_ids
.iter()
.map(|id| partition_map.remove(id).unwrap())
.collect();
let mut state_to_partition: Vec<usize> = vec![0; dfa.num_states()];
for (p_idx, partition) in partitions.iter().enumerate() {
for &s in partition {
state_to_partition[s] = p_idx;
}
}
let mut changed = true;
while changed {
changed = false;
let num_partitions = partitions.len();
let mut new_partitions: Vec<Vec<usize>> = Vec::new();
let mut new_state_to_partition: Vec<usize> = vec![0; dfa.num_states()];
for partition in &partitions {
if partition.len() <= 1 {
let p = new_partitions.len();
for &s in partition {
new_state_to_partition[s] = p;
}
new_partitions.push(partition.clone());
continue;
}
let mut signature_groups: BTreeMap<Vec<(u32, usize)>, Vec<usize>> = BTreeMap::new();
for &state in partition {
let mut sig: Vec<(u32, usize)> = dfa.transitions[state]
.iter()
.map(|&(sym, target)| (sym, state_to_partition[target]))
.collect();
sig.sort();
signature_groups.entry(sig).or_default().push(state);
}
if signature_groups.len() > 1 {
changed = true;
}
for (_, group) in signature_groups {
let p = new_partitions.len();
for &s in &group {
new_state_to_partition[s] = p;
}
new_partitions.push(group);
}
}
partitions = new_partitions;
state_to_partition = new_state_to_partition;
if partitions.len() == num_partitions && !changed {
break;
}
}
let num_min_states = partitions.len();
let mut min_transitions: Vec<Vec<(u32, usize)>> = vec![Vec::new(); num_min_states];
for (p_idx, partition) in partitions.iter().enumerate() {
let representative = partition[0];
for &(sym, target) in &dfa.transitions[representative] {
min_transitions[p_idx].push((sym, state_to_partition[target]));
}
}
let initial_partition_id = state_to_partition[0];
if initial_partition_id != 0 {
min_transitions.swap(0, initial_partition_id);
for row in &mut min_transitions {
for (_, target) in row.iter_mut() {
if *target == 0 {
*target = initial_partition_id;
} else if *target == initial_partition_id {
*target = 0;
}
}
}
}
let mut state_map = state_to_partition;
if initial_partition_id != 0 {
for s in state_map.iter_mut() {
if *s == 0 {
*s = initial_partition_id;
} else if *s == initial_partition_id {
*s = 0;
}
}
}
(
Dfa {
transitions: min_transitions,
},
state_map,
)
}
pub fn symbol_classes(dfa: &Dfa, num_symbols: usize) -> (Vec<u16>, usize) {
let mut col_map: BTreeMap<Vec<usize>, u16> = BTreeMap::new();
let mut class_map = vec![0u16; num_symbols];
let mut next_class = 0u16;
let mut table = vec![0usize; dfa.num_states() * num_symbols];
for (state, trans) in dfa.transitions.iter().enumerate() {
for &(sym, target) in trans {
table[state * num_symbols + sym as usize] = target + 1;
}
}
for sym in 0..num_symbols {
let col: Vec<usize> = (0..dfa.num_states())
.map(|s| table[s * num_symbols + sym])
.collect();
let class = col_map.entry(col).or_insert_with(|| {
let c = next_class;
next_class += 1;
c
});
class_map[sym] = *class;
}
(class_map, next_class as usize)
}