laps_regex/
fa.rs

1//! Finite automaton representations.
2//!
3//! This module contains [`FiniteAutomaton`], which is a simple finite
4//! automaton implementation, and [`State`], which represents a state in
5//! the automaton.
6
7use std::collections::{BTreeSet, HashMap, HashSet};
8use std::hash::Hash;
9use std::marker::PhantomData;
10use std::sync::{Mutex, MutexGuard, OnceLock};
11use std::{fmt, io};
12
13/// The next state ID.
14static NEXT_STATE_ID: OnceLock<Mutex<usize>> = OnceLock::new();
15
16/// Acquires and returns the next state ID.
17fn next_state_id() -> MutexGuard<'static, usize> {
18  NEXT_STATE_ID
19    .get_or_init(|| Mutex::new(0))
20    .lock()
21    .expect("failed to acquire the next state ID")
22}
23
24/// Returns a new state ID and updates the ID counter.
25fn get_and_update_state_id() -> usize {
26  let mut id = next_state_id();
27  let cur = *id;
28  *id += 1;
29  cur
30}
31
32/// Trait for state of finite automaton.
33pub trait State<S> {
34  /// Creates a new empty state.
35  fn new() -> Self;
36
37  /// Adds a new edge to the current state.
38  fn add(&mut self, sym: S, state: usize);
39
40  /// Dumps the current state to the given writer as Graphviz.
41  fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
42  where
43    S: fmt::Debug,
44    W: io::Write;
45}
46
47/// A state of the finite automaton with symbol type `S`.
48///
49/// This state uses [`Vec`] to store edges internally.
50#[derive(Debug)]
51pub struct DenseState<S> {
52  outs: Vec<(S, usize)>,
53}
54
55impl<S> DenseState<S> {
56  /// Returns the output edges.
57  pub fn outs(&self) -> &[(S, usize)] {
58    &self.outs
59  }
60
61  /// Returns ID of the next state after accepting the given symbol `sym`.
62  ///
63  /// This method will return only the first matching state.
64  /// Returns [`None`] if no matching state.
65  pub fn next_state(&self, sym: &S) -> Option<usize>
66  where
67    S: PartialEq,
68  {
69    self
70      .outs
71      .iter()
72      .find_map(|(s, id)| (s == sym).then_some(*id))
73  }
74}
75
76impl<S> State<S> for DenseState<S> {
77  fn new() -> Self {
78    Self { outs: Vec::new() }
79  }
80
81  fn add(&mut self, sym: S, state: usize) {
82    self.outs.push((sym, state));
83  }
84
85  fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
86  where
87    S: fmt::Debug,
88    W: io::Write,
89  {
90    for (s, to) in &self.outs {
91      writeln!(writer, "  {id} -> {to} [label = \"{s:?}\"]")?;
92    }
93    Ok(())
94  }
95}
96
97/// A state of the finite automaton with symbol type `S`.
98///
99/// This state uses [`HashMap<S, HashSet<_>>`] to store edges
100/// and all their output states.
101#[derive(Debug)]
102pub struct MultiState<S> {
103  outs: HashMap<S, HashSet<usize>>,
104}
105
106impl<S> MultiState<S> {
107  /// Returns the map of output edges.
108  pub fn outs(&self) -> &HashMap<S, HashSet<usize>> {
109    &self.outs
110  }
111}
112
113impl<S> State<S> for MultiState<S>
114where
115  S: Eq + Hash,
116{
117  fn new() -> Self {
118    Self {
119      outs: HashMap::new(),
120    }
121  }
122
123  fn add(&mut self, sym: S, state: usize) {
124    self.outs.entry(sym).or_default().insert(state);
125  }
126
127  fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
128  where
129    S: fmt::Debug,
130    W: io::Write,
131  {
132    for (s, to_ids) in &self.outs {
133      for to in to_ids {
134        writeln!(writer, "  {id} -> {to} [label = \"{s:?}\"]")?;
135      }
136    }
137    Ok(())
138  }
139}
140
141/// A finite automaton with symbol type `S`.
142#[derive(Debug)]
143pub struct FiniteAutomaton<Sym, State: self::State<Sym>> {
144  states: HashMap<usize, State>,
145  init: usize,
146  finals: HashSet<usize>,
147  sym: PhantomData<Sym>,
148}
149
150impl<Sym, State: self::State<Sym>> FiniteAutomaton<Sym, State> {
151  /// Creates an empty finite automaton.
152  pub fn new() -> Self {
153    let init = get_and_update_state_id();
154    Self {
155      states: [(init, State::new())].into(),
156      init,
157      finals: HashSet::new(),
158      sym: PhantomData,
159    }
160  }
161
162  /// Creates a new state in the current finite automaton.
163  ///
164  /// Returns the state ID.
165  pub fn add_state(&mut self) -> usize {
166    let id = get_and_update_state_id();
167    self.states.insert(id, State::new());
168    id
169  }
170
171  /// Creates a new final state in the current finite automaton.
172  ///
173  /// Returns the state ID.
174  pub fn add_final_state(&mut self) -> usize {
175    let id = self.add_state();
176    self.finals.insert(id);
177    id
178  }
179
180  /// Sets the given state as a final state.
181  ///
182  /// Returns [`false`](bool) if the given state does not exist.
183  pub fn set_final_state(&mut self, id: usize) -> bool {
184    if self.states.contains_key(&id) {
185      self.finals.insert(id);
186      true
187    } else {
188      false
189    }
190  }
191
192  /// Sets the given state as a normal state.
193  ///
194  /// Returns [`false`](bool) if the given state does not exist.
195  pub fn set_normal_state(&mut self, id: usize) -> bool {
196    if self.states.contains_key(&id) {
197      self.finals.remove(&id);
198      true
199    } else {
200      false
201    }
202  }
203
204  /// Unions the current finite automaton with the given finite automaton.
205  ///
206  /// The initial state of the given finite automaton will be added to
207  /// the current finite automaton as normal states. All final states of
208  /// the given finite automaton will be kept.
209  pub fn union(&mut self, fa: Self) {
210    self.states.extend(fa.states);
211    self.finals.extend(fa.finals);
212  }
213
214  /// Returns a reference to the state map.
215  pub fn states(&self) -> &HashMap<usize, State> {
216    &self.states
217  }
218
219  /// Returns a reference to the given state.
220  ///
221  /// Returns [`None`] if the given state does not exist.
222  pub fn state(&self, id: usize) -> Option<&State> {
223    self.states.get(&id)
224  }
225
226  /// Returns a mutable reference to the given state.
227  ///
228  /// Returns [`None`] if the given state does not exist.
229  pub fn state_mut(&mut self, id: usize) -> Option<&mut State> {
230    self.states.get_mut(&id)
231  }
232
233  /// Returns a reference to the initial state.
234  pub fn init(&self) -> &State {
235    self.states.get(&self.init).unwrap()
236  }
237
238  /// Returns a mutable reference to the given initial state.
239  pub fn init_mut(&mut self) -> &mut State {
240    self.states.get_mut(&self.init).unwrap()
241  }
242
243  /// Returns the ID of the initial state.
244  pub fn init_id(&self) -> usize {
245    self.init
246  }
247
248  /// Returns a reference to the ID set of the final states.
249  pub fn finals(&self) -> &HashSet<usize> {
250    &self.finals
251  }
252
253  /// Returns the ID of the final state.
254  ///
255  /// Returns [`None`] if there is no final state or more than one final state.
256  pub fn final_id(&self) -> Option<usize> {
257    if self.finals().len() > 1 {
258      None
259    } else {
260      self.finals().iter().next().copied()
261    }
262  }
263
264  /// Dumps the current finite automaton to the given writer as Graphviz.
265  pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
266  where
267    Sym: fmt::Debug,
268    W: io::Write,
269  {
270    writeln!(writer, "digraph finite_automaton {{")?;
271    writeln!(writer, "  rankdir = LR")?;
272    writeln!(writer, "  node [shape = doublecircle];")?;
273    write!(writer, " ")?;
274    for id in &self.finals {
275      write!(writer, " {id}")?;
276    }
277    writeln!(writer, ";")?;
278    writeln!(writer, "  node [shape = circle];")?;
279    for (id, state) in &self.states {
280      state.dump(writer, *id)?;
281    }
282    writeln!(writer, "}}")?;
283    Ok(())
284  }
285}
286
287impl<Sym, State: self::State<Sym>> Default for FiniteAutomaton<Sym, State> {
288  fn default() -> Self {
289    Self::new()
290  }
291}
292
293/// Finite automaton which state type is [`DenseState`].
294pub type DenseFA<S> = FiniteAutomaton<S, DenseState<S>>;
295
296/// Finite automaton which state type is [`MultiState`].
297pub type MultiFA<S> = FiniteAutomaton<S, MultiState<S>>;
298
299/// Builder for calculating closures from a finite automation.
300pub struct ClosureBuilder<S> {
301  empty_edges: HashMap<usize, HashSet<usize>>,
302  normal_edges: HashMap<usize, MultiState<S>>,
303}
304
305impl<S> From<MultiFA<Option<S>>> for ClosureBuilder<S>
306where
307  S: Eq + Hash,
308{
309  fn from(fa: MultiFA<Option<S>>) -> Self {
310    let mut empty_edges = HashMap::new();
311    let mut normal_edges: HashMap<_, MultiState<S>> = HashMap::new();
312    for (id, s) in fa.states {
313      for (s, to) in s.outs {
314        match s {
315          Some(s) => normal_edges
316            .entry(id)
317            .or_insert_with(|| State::new())
318            .outs
319            .insert(s, to),
320          None => empty_edges.insert(id, to),
321        };
322      }
323    }
324    Self {
325      empty_edges,
326      normal_edges,
327    }
328  }
329}
330
331impl<S> ClosureBuilder<S> {
332  /// Returns the symbol set of the current finite automaton.
333  pub fn symbol_set(&self) -> HashSet<S>
334  where
335    S: Clone + Eq + Hash,
336  {
337    self
338      .normal_edges
339      .values()
340      .flat_map(|s| s.outs().keys().cloned())
341      .collect()
342  }
343
344  /// Returns the epsilon closure of the given state.
345  pub fn epsilon_closure<Ids>(&self, cached: &mut CachedClosures, ids: Ids) -> Closure
346  where
347    Ids: Into<Closure>,
348  {
349    let mut closure = ids.into();
350    if closure.is_empty() {
351      closure
352    } else if let Some(c) = cached.get(&closure) {
353      c.clone()
354    } else {
355      let ids = closure.clone();
356      let mut next_ids: Vec<_> = closure.iter().copied().collect();
357      while let Some(id) = next_ids.pop() {
358        if let Some(to_ids) = self.empty_edges.get(&id) {
359          for id in to_ids {
360            if closure.insert(*id) {
361              next_ids.push(*id);
362            }
363          }
364        }
365      }
366      cached.insert(ids, closure.clone());
367      closure
368    }
369  }
370
371  /// Returns a set of all possible states that can be reached
372  /// after accepting symbol `s` on the given states.
373  pub fn state_closure(&self, cached: &mut CachedClosures, states: &Closure, s: &S) -> Closure
374  where
375    S: Eq + Hash,
376  {
377    let mut next_states = Closure::new();
378    for id in states {
379      if let Some(ids) = self.normal_edges.get(id).and_then(|st| st.outs().get(s)) {
380        next_states.extend(ids);
381      }
382    }
383    self.epsilon_closure(cached, next_states)
384  }
385}
386
387/// Closure of a state of finite automaton.
388pub type Closure = BTreeSet<usize>;
389
390/// Cached closures.
391pub type CachedClosures = HashMap<Closure, Closure>;