1use std::collections::{BTreeSet, HashMap, HashSet};
8use std::hash::Hash;
9use std::marker::PhantomData;
10use std::sync::{Mutex, MutexGuard, OnceLock};
11use std::{fmt, io};
12
13static NEXT_STATE_ID: OnceLock<Mutex<usize>> = OnceLock::new();
15
16fn next_state_id() -> MutexGuard<'static, usize> {
18 NEXT_STATE_ID
19 .get_or_init(|| Mutex::new(0))
20 .lock()
21 .expect("failed to acquire the next state ID")
22}
23
24fn get_and_update_state_id() -> usize {
26 let mut id = next_state_id();
27 let cur = *id;
28 *id += 1;
29 cur
30}
31
32pub trait State<S> {
34 fn new() -> Self;
36
37 fn add(&mut self, sym: S, state: usize);
39
40 fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
42 where
43 S: fmt::Debug,
44 W: io::Write;
45}
46
47#[derive(Debug)]
51pub struct DenseState<S> {
52 outs: Vec<(S, usize)>,
53}
54
55impl<S> DenseState<S> {
56 pub fn outs(&self) -> &[(S, usize)] {
58 &self.outs
59 }
60
61 pub fn next_state(&self, sym: &S) -> Option<usize>
66 where
67 S: PartialEq,
68 {
69 self
70 .outs
71 .iter()
72 .find_map(|(s, id)| (s == sym).then_some(*id))
73 }
74}
75
76impl<S> State<S> for DenseState<S> {
77 fn new() -> Self {
78 Self { outs: Vec::new() }
79 }
80
81 fn add(&mut self, sym: S, state: usize) {
82 self.outs.push((sym, state));
83 }
84
85 fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
86 where
87 S: fmt::Debug,
88 W: io::Write,
89 {
90 for (s, to) in &self.outs {
91 writeln!(writer, " {id} -> {to} [label = \"{s:?}\"]")?;
92 }
93 Ok(())
94 }
95}
96
97#[derive(Debug)]
102pub struct MultiState<S> {
103 outs: HashMap<S, HashSet<usize>>,
104}
105
106impl<S> MultiState<S> {
107 pub fn outs(&self) -> &HashMap<S, HashSet<usize>> {
109 &self.outs
110 }
111}
112
113impl<S> State<S> for MultiState<S>
114where
115 S: Eq + Hash,
116{
117 fn new() -> Self {
118 Self {
119 outs: HashMap::new(),
120 }
121 }
122
123 fn add(&mut self, sym: S, state: usize) {
124 self.outs.entry(sym).or_default().insert(state);
125 }
126
127 fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
128 where
129 S: fmt::Debug,
130 W: io::Write,
131 {
132 for (s, to_ids) in &self.outs {
133 for to in to_ids {
134 writeln!(writer, " {id} -> {to} [label = \"{s:?}\"]")?;
135 }
136 }
137 Ok(())
138 }
139}
140
141#[derive(Debug)]
143pub struct FiniteAutomaton<Sym, State: self::State<Sym>> {
144 states: HashMap<usize, State>,
145 init: usize,
146 finals: HashSet<usize>,
147 sym: PhantomData<Sym>,
148}
149
150impl<Sym, State: self::State<Sym>> FiniteAutomaton<Sym, State> {
151 pub fn new() -> Self {
153 let init = get_and_update_state_id();
154 Self {
155 states: [(init, State::new())].into(),
156 init,
157 finals: HashSet::new(),
158 sym: PhantomData,
159 }
160 }
161
162 pub fn add_state(&mut self) -> usize {
166 let id = get_and_update_state_id();
167 self.states.insert(id, State::new());
168 id
169 }
170
171 pub fn add_final_state(&mut self) -> usize {
175 let id = self.add_state();
176 self.finals.insert(id);
177 id
178 }
179
180 pub fn set_final_state(&mut self, id: usize) -> bool {
184 if self.states.contains_key(&id) {
185 self.finals.insert(id);
186 true
187 } else {
188 false
189 }
190 }
191
192 pub fn set_normal_state(&mut self, id: usize) -> bool {
196 if self.states.contains_key(&id) {
197 self.finals.remove(&id);
198 true
199 } else {
200 false
201 }
202 }
203
204 pub fn union(&mut self, fa: Self) {
210 self.states.extend(fa.states);
211 self.finals.extend(fa.finals);
212 }
213
214 pub fn states(&self) -> &HashMap<usize, State> {
216 &self.states
217 }
218
219 pub fn state(&self, id: usize) -> Option<&State> {
223 self.states.get(&id)
224 }
225
226 pub fn state_mut(&mut self, id: usize) -> Option<&mut State> {
230 self.states.get_mut(&id)
231 }
232
233 pub fn init(&self) -> &State {
235 self.states.get(&self.init).unwrap()
236 }
237
238 pub fn init_mut(&mut self) -> &mut State {
240 self.states.get_mut(&self.init).unwrap()
241 }
242
243 pub fn init_id(&self) -> usize {
245 self.init
246 }
247
248 pub fn finals(&self) -> &HashSet<usize> {
250 &self.finals
251 }
252
253 pub fn final_id(&self) -> Option<usize> {
257 if self.finals().len() > 1 {
258 None
259 } else {
260 self.finals().iter().next().copied()
261 }
262 }
263
264 pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
266 where
267 Sym: fmt::Debug,
268 W: io::Write,
269 {
270 writeln!(writer, "digraph finite_automaton {{")?;
271 writeln!(writer, " rankdir = LR")?;
272 writeln!(writer, " node [shape = doublecircle];")?;
273 write!(writer, " ")?;
274 for id in &self.finals {
275 write!(writer, " {id}")?;
276 }
277 writeln!(writer, ";")?;
278 writeln!(writer, " node [shape = circle];")?;
279 for (id, state) in &self.states {
280 state.dump(writer, *id)?;
281 }
282 writeln!(writer, "}}")?;
283 Ok(())
284 }
285}
286
287impl<Sym, State: self::State<Sym>> Default for FiniteAutomaton<Sym, State> {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293pub type DenseFA<S> = FiniteAutomaton<S, DenseState<S>>;
295
296pub type MultiFA<S> = FiniteAutomaton<S, MultiState<S>>;
298
299pub struct ClosureBuilder<S> {
301 empty_edges: HashMap<usize, HashSet<usize>>,
302 normal_edges: HashMap<usize, MultiState<S>>,
303}
304
305impl<S> From<MultiFA<Option<S>>> for ClosureBuilder<S>
306where
307 S: Eq + Hash,
308{
309 fn from(fa: MultiFA<Option<S>>) -> Self {
310 let mut empty_edges = HashMap::new();
311 let mut normal_edges: HashMap<_, MultiState<S>> = HashMap::new();
312 for (id, s) in fa.states {
313 for (s, to) in s.outs {
314 match s {
315 Some(s) => normal_edges
316 .entry(id)
317 .or_insert_with(|| State::new())
318 .outs
319 .insert(s, to),
320 None => empty_edges.insert(id, to),
321 };
322 }
323 }
324 Self {
325 empty_edges,
326 normal_edges,
327 }
328 }
329}
330
331impl<S> ClosureBuilder<S> {
332 pub fn symbol_set(&self) -> HashSet<S>
334 where
335 S: Clone + Eq + Hash,
336 {
337 self
338 .normal_edges
339 .values()
340 .flat_map(|s| s.outs().keys().cloned())
341 .collect()
342 }
343
344 pub fn epsilon_closure<Ids>(&self, cached: &mut CachedClosures, ids: Ids) -> Closure
346 where
347 Ids: Into<Closure>,
348 {
349 let mut closure = ids.into();
350 if closure.is_empty() {
351 closure
352 } else if let Some(c) = cached.get(&closure) {
353 c.clone()
354 } else {
355 let ids = closure.clone();
356 let mut next_ids: Vec<_> = closure.iter().copied().collect();
357 while let Some(id) = next_ids.pop() {
358 if let Some(to_ids) = self.empty_edges.get(&id) {
359 for id in to_ids {
360 if closure.insert(*id) {
361 next_ids.push(*id);
362 }
363 }
364 }
365 }
366 cached.insert(ids, closure.clone());
367 closure
368 }
369 }
370
371 pub fn state_closure(&self, cached: &mut CachedClosures, states: &Closure, s: &S) -> Closure
374 where
375 S: Eq + Hash,
376 {
377 let mut next_states = Closure::new();
378 for id in states {
379 if let Some(ids) = self.normal_edges.get(id).and_then(|st| st.outs().get(s)) {
380 next_states.extend(ids);
381 }
382 }
383 self.epsilon_closure(cached, next_states)
384 }
385}
386
387pub type Closure = BTreeSet<usize>;
389
390pub type CachedClosures = HashMap<Closure, Closure>;