1use regex::Regex;
34use std::ops::Deref;
35
36pub struct Automaton {
38 pub state_initial: i32,
39 pub state_final: i32,
40 finders: Vec<Finder>,
41}
42
43pub struct Token {
45 pub type_id: i32,
46 pub text: std::string::String,
47}
48
49pub struct Runner<'a> {
51 pub source: std::string::String,
52 automaton: &'a Automaton,
53 pub state: i32,
54 pub tokens: Vec<Token>,
55}
56
57pub struct Finder {
59 pub state_from: i32,
60 pub state_to: i32,
61 callback: fn(runner: &mut Runner, finder: &Finder) -> bool,
62 regex: Option<Regex>,
63 automaton: Option<Automaton>,
64 pub token_type: i32,
65 pub join_tokens: bool,
66}
67
68impl<'a> Automaton {
69 pub fn new(state_initial: i32, state_final: i32) -> Automaton {
70 Automaton {
71 state_initial: state_initial,
72 state_final: state_final,
73 finders: vec![],
74 }
75 }
76
77 pub fn run(&'a self, source: std::string::String) -> Runner<'a> {
78 let mut runner = Runner {
79 source: source,
80 automaton: self,
81 state: self.state_initial,
82 tokens: vec![],
83 };
84
85 runner.run();
86
87 runner
88 }
89
90 pub fn run_loop(&'a self, source: std::string::String) -> Runner<'a> {
91 let mut runner = Runner {
92 source: source,
93 automaton: self,
94 state: self.state_initial,
95 tokens: vec![],
96 };
97
98 runner.run_loop();
99
100 runner
101 }
102
103 pub fn find_custom(
104 &mut self,
105 token_type: i32,
106 state_from: i32,
107 state_to: i32,
108 callback: fn(runner: &mut Runner, finder: &Finder) -> bool,
109 ) {
110 self.finders.push(Finder {
111 state_from: state_from,
112 state_to: state_to,
113 callback: callback,
114 regex: None,
115 automaton: None,
116 token_type: token_type,
117 join_tokens: false,
118 })
119 }
120
121 fn finder_whitespace(runner: &mut Runner, finder: &Finder) -> bool {
122 let ws = &[' ', '\t'];
123 if runner.source.len() > 0 && ws.contains(&(runner.source.as_bytes()[0] as char)) {
124 let mut num_spaces = 1;
125 for i in 1..runner.source.len() {
126 if ws.contains(&(runner.source.as_bytes()[i] as char)) {
127 num_spaces += 1;
128 } else {
129 break;
130 }
131 }
132 if num_spaces > 0 {
133 let text = runner.source.deref()[..num_spaces].to_string();
134 runner.add_token(Token::new(finder.token_type, text));
135 return true;
136 }
137 }
138 return false;
139 }
140
141 pub fn find_whitespace(&mut self, token_type: i32, state_from: i32, state_to: i32) {
142 self.find_custom(
143 token_type,
144 state_from,
145 state_to,
146 Automaton::finder_whitespace,
147 );
148 }
149
150 fn finder_end(runner: &mut Runner, finder: &Finder) -> bool {
151 if runner.source.len() == 0 {
152 runner.add_token(Token::new(finder.token_type, "".to_string()));
153 true
154 } else {
155 false
156 }
157 }
158
159 pub fn find_end(&mut self, token_type: i32, state_from: i32, state_to: i32) {
160 self.find_custom(token_type, state_from, state_to, Automaton::finder_end);
161 }
162
163 fn finder_regex(runner: &mut Runner, finder: &Finder) -> bool {
164 match finder
165 .regex
166 .clone()
167 .unwrap()
168 .find(runner.source.clone().deref())
169 {
170 Some(regex_match) => {
171 if regex_match.start() == 0 {
172 let text = runner.source.clone().deref()[..regex_match.end()].to_string();
173 runner.add_token(Token::new(finder.token_type, text));
174 true
175 } else {
176 false
177 }
178 }
179 None => false,
180 }
181 }
182
183 pub fn find_regex(&mut self, token_type: i32, state_from: i32, state_to: i32, re: Regex) {
184 self.finders.push(Finder {
185 state_from: state_from,
186 state_to: state_to,
187 callback: Automaton::finder_regex,
188 regex: Some(re),
189 automaton: None,
190 token_type: token_type,
191 join_tokens: false,
192 })
193 }
194
195 fn automaton_run(runner: &mut Runner, finder: &Finder, am: &Automaton) -> bool {
196 let sub_runner = am.run(runner.source.clone());
197 if sub_runner.state == am.state_final {
198 if finder.join_tokens {
199 let mut full_text = std::string::String::new();
200 for part in sub_runner.tokens.iter() {
201 full_text.push_str(part.text.deref());
202 }
203 runner.tokens.push(Token {
204 type_id: finder.token_type,
205 text: full_text,
206 });
207 } else {
208 for t in sub_runner.tokens.deref().iter() {
209 runner.tokens.push(t.clone());
210 }
211 }
212 runner.source = sub_runner.source.clone();
213 true
214 } else {
215 false
216 }
217 }
218
219 fn finder_automaton(runner: &mut Runner, finder: &Finder) -> bool {
220 match finder.automaton {
221 Some(ref am) => Automaton::automaton_run(runner, finder, am),
222 None => panic!(),
223 }
224 }
225
226 pub fn find_automaton(
227 &'a mut self,
228 state_from: i32,
229 state_to: i32,
230 am: Automaton,
231 ) -> &'a mut Finder {
232 self.finders.push(Finder {
233 state_from: state_from,
234 state_to: state_to,
235 callback: Automaton::finder_automaton,
236 regex: None,
237 automaton: Some(am),
238 token_type: -1,
239 join_tokens: false,
240 });
241 self.finders.last_mut().unwrap()
242 }
243
244 fn finder_me(runner: &mut Runner, finder: &Finder) -> bool {
245 Automaton::automaton_run(runner, finder, runner.automaton)
246 }
247
248 pub fn find_me(&'a mut self, state_from: i32, state_to: i32) -> &'a mut Finder {
249 self.finders.push(Finder {
250 state_from: state_from,
251 state_to: state_to,
252 callback: Automaton::finder_me,
253 regex: None,
254 automaton: None,
255 token_type: -1,
256 join_tokens: false,
257 });
258 self.finders.last_mut().unwrap()
259 }
260}
261
262impl std::fmt::Debug for Automaton {
263 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
264 write!(f, "([{} --> {}])", self.state_initial, self.state_final)
265 }
266}
267
268impl std::clone::Clone for Automaton {
269 fn clone(&self) -> Automaton {
270 Automaton {
271 state_initial: self.state_initial,
272 state_final: self.state_final,
273 finders: self.finders.clone(),
274 }
275 }
276}
277
278impl Token {
279 pub fn new(type_id: i32, text: std::string::String) -> Token {
280 Token {
281 type_id: type_id,
282 text: text,
283 }
284 }
285}
286
287impl std::fmt::Debug for Token {
288 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
289 write!(f, "([{}] \"{}\")", self.type_id, self.text)
290 }
291}
292
293impl std::clone::Clone for Token {
294 fn clone(&self) -> Token {
295 Token::new(self.type_id, self.text.clone())
296 }
297}
298
299impl<'a> Runner<'a> {
300 fn run(&mut self) {
301 for finder in self.automaton.finders.iter() {
302 let func = finder.callback;
303 if self.state == finder.state_from && func(self, finder) == true {
304 self.state = finder.state_to;
305 }
306 }
307 }
308
309 fn run_loop(&mut self) {
310 let mut has_reached_end = false;
311 loop {
312 self.run();
313 if self.completed() == false || has_reached_end {
314 break;
315 }
316 self.state = self.automaton.state_initial;
317 has_reached_end = self.source.len() == 0;
320 }
321 }
322
323 pub fn add_token(&mut self, token: Token) {
324 let len = token.text.len();
325 self.tokens.push(token);
326 self.source = self.source.deref()[len..].to_string();
327 }
328
329 pub fn completed(&self) -> bool {
330 self.state == self.automaton.state_final
331 }
332}
333
334impl<'a> std::fmt::Debug for Runner<'a> {
335 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
336 write!(
337 f,
338 "(runner [automaton: {:?}, current_state: {}])",
339 self.automaton, self.state
340 )
341 }
342}
343
344impl Finder {
345 pub fn join_tokens(&mut self, token_type: i32) {
346 self.join_tokens = true;
347 self.token_type = token_type;
348 }
349}
350
351impl std::clone::Clone for Finder {
352 fn clone(&self) -> Finder {
353 Finder {
354 state_from: self.state_from,
355 state_to: self.state_to,
356 callback: self.callback,
357 regex: self.regex.clone(),
358 automaton: self.automaton.clone(),
359 token_type: self.token_type,
360 join_tokens: self.join_tokens,
361 }
362 }
363}