1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
//! Nondeterministic finite automaton ([`NFA`]) related implementations.
//!
//! An NFA can be built from a mid-level intermediate represention ([`Mir`]).

use crate::fa::{MultiFA, State};
use crate::mir::Mir;
use std::collections::HashMap;
use std::hash::Hash;
use std::{fmt, io};

/// A nondeterministic finite automaton (NFA)
/// with symbol type `S` and tag type `T`.
#[derive(Debug)]
pub struct NFA<S, T>
where
  S: Eq + Hash,
{
  fa: MultiFA<Option<(S, S)>>,
  tags: HashMap<usize, T>,
}

impl<S, T> NFA<S, T>
where
  S: Eq + Hash,
{
  /// Creates a new NFA from [`Mir`].
  pub fn new(mir: Mir<S, T>) -> Self {
    match mir {
      Mir::Empty => Self::new_nfa_with_symbol(None),
      Mir::Range(l, r) => Self::new_nfa_with_symbol(Some((l, r))),
      Mir::Concat(c) => c.into_iter().map(Self::new).reduce(Self::concat).unwrap(),
      Mir::Alter(mut a) => {
        if a.len() == 1 {
          let (mir, tag) = a.swap_remove(0);
          let mut nfa = Self::new(mir);
          if let Some(tag) = tag {
            let fs = nfa.normalize();
            nfa.fa.set_final_state(fs);
            nfa.tags.insert(fs, tag);
          }
          nfa
        } else {
          a.into_iter()
            .map(|(mir, tag)| (Self::new(mir), tag))
            .reduce(Self::alter)
            .unwrap()
            .0
        }
      }
      Mir::Kleene(k) => {
        // create NFA and normalize
        let mut nfa = Self::new(*k);
        let id = nfa.normalize();
        // create a edge to the initial state
        let init = nfa.fa.init_id();
        nfa.fa.state_mut(id).unwrap().add(None, init);
        // set the initial state as a final state
        nfa.fa.set_final_state(init);
        nfa
      }
    }
  }

  /// Creates a new NFA which matches the given symbol.
  fn new_nfa_with_symbol(sym: Option<(S, S)>) -> Self {
    let mut fa = MultiFA::new();
    let fs = fa.add_final_state();
    fa.init_mut().add(sym, fs);
    Self {
      fa,
      tags: HashMap::new(),
    }
  }

  /// Creates an alternation of the given two NFA-tag pairs.
  fn alter(
    (mut nfa1, tag1): (Self, Option<T>),
    (mut nfa2, tag2): (Self, Option<T>),
  ) -> (Self, Option<T>) {
    // create final state and tag mapping for `nfa1`
    let fs1 = nfa1.normalize();
    nfa1.fa.set_final_state(fs1);
    if let Some(tag1) = tag1 {
      nfa1.tags.insert(fs1, tag1);
    }
    // add empty edge to the initial state of `nfa2`
    nfa1.fa.init_mut().add(None, nfa2.fa.init_id());
    // create final state and tag mapping for `nfa2` if it has a tag
    if let Some(tag2) = tag2 {
      let fs2 = nfa2.normalize();
      nfa2.fa.set_final_state(fs2);
      nfa1.tags.insert(fs2, tag2);
    }
    // union states and tags of two NFAs
    nfa1.fa.union(nfa2.fa);
    nfa1.tags.extend(nfa2.tags);
    (nfa1, None)
  }

  /// Concatenates the given two NFAs into a new NFA.
  fn concat(mut nfa1: Self, nfa2: Self) -> Self {
    let fs1 = nfa1.normalize();
    nfa1.fa.state_mut(fs1).unwrap().add(None, nfa2.fa.init_id());
    nfa1.fa.union(nfa2.fa);
    nfa1.tags.extend(nfa2.tags);
    nfa1
  }

  /// Normalizes the current NFA.
  ///
  /// Keeps only final states with tags, set all other final states as
  /// normal states, and route them to a new normal state with an empty edge.
  ///
  /// Returns the normal state ID.
  fn normalize(&mut self) -> usize {
    // try to get an untagged final state
    let untagged = self
      .fa
      .finals()
      .iter()
      .copied()
      .find(|id| !self.tags.contains_key(id));
    // get the target state id
    let target = if let Some(untagged) = untagged {
      self.fa.set_normal_state(untagged);
      untagged
    } else {
      self.fa.add_state()
    };
    // add edges to the target state
    for id in self.fa.finals().clone() {
      if id != target {
        self.fa.state_mut(id).unwrap().add(None, target);
        if !self.tags.contains_key(&id) {
          self.fa.set_normal_state(id);
        }
      }
    }
    target
  }

  /// Converts the current NFA into a
  /// [`FiniteAutomaton`](crate::fa::FiniteAutomaton) and a tag set.
  pub fn into_fa_tags(self) -> FATags<S, T> {
    (self.fa, self.tags)
  }

  /// Dumps the current finite automaton to the given writer as Graphviz.
  pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
  where
    S: fmt::Debug,
    W: io::Write,
  {
    self.fa.dump(writer)
  }
}

impl<S, T> From<Mir<S, T>> for NFA<S, T>
where
  S: Eq + Hash,
{
  fn from(mir: Mir<S, T>) -> Self {
    Self::new(mir)
  }
}

/// A pair of [`NFA`]'s internal finite automaton and the tag map.
///
/// Used by method [`into_fa_tags`](NFA#method.into_fa_tags) of [`NFA`].
pub type FATags<S, T> = (MultiFA<Option<(S, S)>>, HashMap<usize, T>);