use std::collections::{BTreeSet, HashMap, HashSet};
use std::hash::Hash;
use std::marker::PhantomData;
use std::sync::{Mutex, MutexGuard, OnceLock};
use std::{fmt, io};
static NEXT_STATE_ID: OnceLock<Mutex<usize>> = OnceLock::new();
fn next_state_id() -> MutexGuard<'static, usize> {
NEXT_STATE_ID
.get_or_init(|| Mutex::new(0))
.lock()
.expect("failed to acquire the next state ID")
}
fn get_and_update_state_id() -> usize {
let mut id = next_state_id();
let cur = *id;
*id += 1;
cur
}
pub trait State<S> {
fn new() -> Self;
fn add(&mut self, sym: S, state: usize);
fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
where
S: fmt::Debug,
W: io::Write;
}
#[derive(Debug)]
pub struct DenseState<S> {
outs: Vec<(S, usize)>,
}
impl<S> DenseState<S> {
pub fn outs(&self) -> &[(S, usize)] {
&self.outs
}
pub fn next_state(&self, sym: &S) -> Option<usize>
where
S: PartialEq,
{
self
.outs
.iter()
.find_map(|(s, id)| (s == sym).then_some(*id))
}
}
impl<S> State<S> for DenseState<S> {
fn new() -> Self {
Self { outs: Vec::new() }
}
fn add(&mut self, sym: S, state: usize) {
self.outs.push((sym, state));
}
fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
where
S: fmt::Debug,
W: io::Write,
{
for (s, to) in &self.outs {
writeln!(writer, " {id} -> {to} [label = \"{s:?}\"]")?;
}
Ok(())
}
}
#[derive(Debug)]
pub struct MultiState<S> {
outs: HashMap<S, HashSet<usize>>,
}
impl<S> MultiState<S> {
pub fn outs(&self) -> &HashMap<S, HashSet<usize>> {
&self.outs
}
}
impl<S> State<S> for MultiState<S>
where
S: Eq + Hash,
{
fn new() -> Self {
Self {
outs: HashMap::new(),
}
}
fn add(&mut self, sym: S, state: usize) {
self.outs.entry(sym).or_default().insert(state);
}
fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
where
S: fmt::Debug,
W: io::Write,
{
for (s, to_ids) in &self.outs {
for to in to_ids {
writeln!(writer, " {id} -> {to} [label = \"{s:?}\"]")?;
}
}
Ok(())
}
}
#[derive(Debug)]
pub struct FiniteAutomaton<Sym, State: self::State<Sym>> {
states: HashMap<usize, State>,
init: usize,
finals: HashSet<usize>,
sym: PhantomData<Sym>,
}
impl<Sym, State: self::State<Sym>> FiniteAutomaton<Sym, State> {
pub fn new() -> Self {
let init = get_and_update_state_id();
Self {
states: [(init, State::new())].into(),
init,
finals: HashSet::new(),
sym: PhantomData,
}
}
pub fn add_state(&mut self) -> usize {
let id = get_and_update_state_id();
self.states.insert(id, State::new());
id
}
pub fn add_final_state(&mut self) -> usize {
let id = self.add_state();
self.finals.insert(id);
id
}
pub fn set_final_state(&mut self, id: usize) -> bool {
if self.states.contains_key(&id) {
self.finals.insert(id);
true
} else {
false
}
}
pub fn set_normal_state(&mut self, id: usize) -> bool {
if self.states.contains_key(&id) {
self.finals.remove(&id);
true
} else {
false
}
}
pub fn union(&mut self, fa: Self) {
self.states.extend(fa.states);
self.finals.extend(fa.finals);
}
pub fn states(&self) -> &HashMap<usize, State> {
&self.states
}
pub fn state(&self, id: usize) -> Option<&State> {
self.states.get(&id)
}
pub fn state_mut(&mut self, id: usize) -> Option<&mut State> {
self.states.get_mut(&id)
}
pub fn init(&self) -> &State {
self.states.get(&self.init).unwrap()
}
pub fn init_mut(&mut self) -> &mut State {
self.states.get_mut(&self.init).unwrap()
}
pub fn init_id(&self) -> usize {
self.init
}
pub fn finals(&self) -> &HashSet<usize> {
&self.finals
}
pub fn final_id(&self) -> Option<usize> {
if self.finals().len() > 1 {
None
} else {
self.finals().iter().next().copied()
}
}
pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
where
Sym: fmt::Debug,
W: io::Write,
{
writeln!(writer, "digraph finite_automaton {{")?;
writeln!(writer, " rankdir = LR")?;
writeln!(writer, " node [shape = doublecircle];")?;
write!(writer, " ")?;
for id in &self.finals {
write!(writer, " {id}")?;
}
writeln!(writer, ";")?;
writeln!(writer, " node [shape = circle];")?;
for (id, state) in &self.states {
state.dump(writer, *id)?;
}
writeln!(writer, "}}")?;
Ok(())
}
}
impl<Sym, State: self::State<Sym>> Default for FiniteAutomaton<Sym, State> {
fn default() -> Self {
Self::new()
}
}
pub type DenseFA<S> = FiniteAutomaton<S, DenseState<S>>;
pub type MultiFA<S> = FiniteAutomaton<S, MultiState<S>>;
pub struct ClosureBuilder<S> {
empty_edges: HashMap<usize, HashSet<usize>>,
normal_edges: HashMap<usize, MultiState<S>>,
}
impl<S> From<MultiFA<Option<S>>> for ClosureBuilder<S>
where
S: Eq + Hash,
{
fn from(fa: MultiFA<Option<S>>) -> Self {
let mut empty_edges = HashMap::new();
let mut normal_edges: HashMap<_, MultiState<S>> = HashMap::new();
for (id, s) in fa.states {
for (s, to) in s.outs {
match s {
Some(s) => normal_edges
.entry(id)
.or_insert_with(|| State::new())
.outs
.insert(s, to),
None => empty_edges.insert(id, to),
};
}
}
Self {
empty_edges,
normal_edges,
}
}
}
impl<S> ClosureBuilder<S> {
pub fn symbol_set(&self) -> HashSet<S>
where
S: Clone + Eq + Hash,
{
self
.normal_edges
.values()
.flat_map(|s| s.outs().keys().cloned())
.collect()
}
pub fn epsilon_closure<Ids>(&self, cached: &mut CachedClosures, ids: Ids) -> Closure
where
Ids: Into<Closure>,
{
let mut closure = ids.into();
if closure.is_empty() {
closure
} else if let Some(c) = cached.get(&closure) {
c.clone()
} else {
let ids = closure.clone();
let mut next_ids: Vec<_> = closure.iter().copied().collect();
while let Some(id) = next_ids.pop() {
if let Some(to_ids) = self.empty_edges.get(&id) {
for id in to_ids {
if closure.insert(*id) {
next_ids.push(*id);
}
}
}
}
cached.insert(ids, closure.clone());
closure
}
}
pub fn state_closure(&self, cached: &mut CachedClosures, states: &Closure, s: &S) -> Closure
where
S: Eq + Hash,
{
let mut next_states = Closure::new();
for id in states {
if let Some(ids) = self.normal_edges.get(id).and_then(|st| st.outs().get(s)) {
next_states.extend(ids);
}
}
self.epsilon_closure(cached, next_states)
}
}
pub type Closure = BTreeSet<usize>;
pub type CachedClosures = HashMap<Closure, Closure>;