1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
use super::{StateIdx, DFA};
pub use crate::nfa::simulate::{ErrorLoc, Matches};
use crate::nfa::AcceptingState;
use crate::range_map::Range;
use crate::right_ctx::RightCtxDFAs;
impl<A: Copy> DFA<StateIdx, A> {
pub fn simulate<'input>(
&self,
input: &'input str,
right_ctx_dfas: &RightCtxDFAs<StateIdx>,
) -> (Matches<'input, A>, Option<ErrorLoc>) {
let mut values: Matches<'input, A> = vec![];
// Current state
let mut state = StateIdx(0);
// See comments for the same variable in NFA simulation
let mut last_match: Option<(usize, A, usize)> = None;
let mut char_indices = input.char_indices();
// Where the current match starts
let mut match_start = 0;
// Index of current character in input string
let mut char_idx: usize;
'outer: loop {
while let Some((char_idx_, char)) = char_indices.next() {
char_idx = match_start + char_idx_;
match next(self, state, char) {
None => {
match last_match.take() {
None => {
// We're stuck and can't backtrack, raise an error
return (values, Some(match_start));
}
Some((last_match_start, last_match_value, last_match_end)) => {
// Backtrack to the previous accepting state
match_start = last_match_end;
char_indices = input[match_start..].char_indices();
// Accept the previous match
values.push((
&input[last_match_start..last_match_end],
last_match_value,
));
// Restart state machine
state = StateIdx(0);
}
}
}
Some(next_state) => {
state = next_state;
// Check for accepting state
for AcceptingState { value, right_ctx } in &self.states[state.0].accepting {
match right_ctx {
None => {
last_match =
Some((match_start, *value, char_idx + char.len_utf8()));
break;
}
Some(right_ctx_idx) => {
let right_ctx_dfa = right_ctx_dfas.get(right_ctx_idx);
if simulate_right_ctx(right_ctx_dfa, char_indices.clone()) {
last_match =
Some((match_start, *value, char_idx + char.len_utf8()));
break;
}
}
}
}
}
}
}
// Reached EOF, take EOF transition, check for accepting states
if let Some(next) = next_end_of_input(self, state) {
// Check for accepting state
state = next;
for AcceptingState { value, right_ctx } in &self.states[state.0].accepting {
match right_ctx {
None => {
values.push((&input[match_start..], *value));
break 'outer;
}
Some(right_ctx_idx) => {
let right_ctx_dfa = right_ctx_dfas.get(right_ctx_idx);
if simulate_right_ctx(right_ctx_dfa, char_indices.clone()) {
values.push((&input[match_start..], *value));
break 'outer;
}
}
}
}
}
// Reached EOF but cannot accept input, backtrack if possible, otherwise raise an error
match last_match.take() {
Some((last_match_start, last_match_value, last_match_end)) => {
values.push((&input[last_match_start..last_match_end], last_match_value));
if last_match_end == input.len() {
break 'outer;
} else {
// Backtrack
match_start = last_match_end;
char_indices = input[match_start..].char_indices();
// Restart state machine
state = StateIdx(0);
}
}
None => {
// We're stuck and can't backtrack, raise an error
return (values, Some(match_start));
}
}
}
(values, None)
}
}
fn next<A>(dfa: &DFA<StateIdx, A>, state: StateIdx, char: char) -> Option<StateIdx> {
let state = &dfa.states[state.0];
if let Some(next) = state.char_transitions.get(&char) {
return Some(*next);
}
for range in state.range_transitions.iter() {
let Range { start, end, value } = range;
if char as u32 >= *start && char as u32 <= *end {
return Some(*value);
}
}
if let Some(next) = state.any_transition {
return Some(next);
}
None
}
fn next_end_of_input<A>(dfa: &DFA<StateIdx, A>, state: StateIdx) -> Option<StateIdx> {
dfa.states[state.0].end_of_input_transition
}
// Similar to `simulate`, but does not keep track of the last match as we don't need "longest
// match" semantics and backtracking
pub fn simulate_right_ctx(dfa: &DFA<StateIdx, ()>, char_indices: std::str::CharIndices) -> bool {
let mut state = dfa.initial_state();
if dfa.is_accepting_state(state) {
return true;
}
for (_, char) in char_indices {
match next(dfa, state, char) {
None => {
// Stuck
return false;
}
Some(next_state) => {
if dfa.is_accepting_state(next_state) {
return true;
}
state = next_state;
}
}
}
match next_end_of_input(dfa, state) {
None => false,
Some(next_state) => dfa.is_accepting_state(next_state),
}
}