1#![allow(dead_code)]
2#![allow(unused_mut)]
3#![allow(unreachable_code)]
4
5extern crate regex;
6
7#[macro_use]
8extern crate lazy_static;
9
10use regex::Regex;
11use std::collections::HashMap;
12
13enum SV {
17 Undefined,
18 _0(Token),
19 _1(i32)
20}
21
22static LEX_RULES: [&'static str; 6] = [
26 r##########"^\s+"##########,
27 r##########"^\d+"##########,
28 r##########"^\+"##########,
29 r##########"^\*"##########,
30 r##########"^\("##########,
31 r##########"^\)"##########
32];
33
34static EOF: &'static str = "$";
38
39macro_rules! hashmap(
45 { $($key:expr => $value:expr),+ } => {
46 {
47 let mut m = ::std::collections::HashMap::new();
48 $(
49 m.insert($key, $value);
50 )+
51 m
52 }
53 };
54);
55
56macro_rules! get_result {
60 ($r:expr, $ty:ident) => (match $r { SV::$ty(v) => v, _ => unreachable!() });
61}
62
63macro_rules! pop {
67 ($s:expr, $ty:ident) => (get_result!($s.pop().unwrap(), $ty));
68}
69
70static PRODUCTIONS : [[i32; 2]; 5] = [
76 [-1, 1],
77 [0, 3],
78 [0, 3],
79 [0, 1],
80 [0, 3]
81];
82
83enum TE {
87 Accept,
88
89 Shift(usize),
91
92 Reduce(usize),
94
95 Transit(usize),
97}
98
99lazy_static! {
100 static ref LEX_RULES_BY_START_CONDITIONS: HashMap<&'static str, Vec<i32>> = hashmap! { "INITIAL" => vec! [ 0, 1, 2, 3, 4, 5 ] };
104
105 static ref TOKENS_MAP: HashMap<&'static str, i32> = hashmap! { "+" => 1, "*" => 2, "NUMBER" => 3, "(" => 4, ")" => 5, "$" => 6 };
110
111 static ref TABLE: Vec<HashMap<i32, TE>>= vec![
118 hashmap! { 0 => TE::Transit(1), 3 => TE::Shift(2), 4 => TE::Shift(3) },
119 hashmap! { 1 => TE::Shift(4), 2 => TE::Shift(5), 6 => TE::Accept },
120 hashmap! { 1 => TE::Reduce(3), 2 => TE::Reduce(3), 5 => TE::Reduce(3), 6 => TE::Reduce(3) },
121 hashmap! { 0 => TE::Transit(8), 3 => TE::Shift(2), 4 => TE::Shift(3) },
122 hashmap! { 0 => TE::Transit(6), 3 => TE::Shift(2), 4 => TE::Shift(3) },
123 hashmap! { 0 => TE::Transit(7), 3 => TE::Shift(2), 4 => TE::Shift(3) },
124 hashmap! { 1 => TE::Reduce(1), 2 => TE::Shift(5), 5 => TE::Reduce(1), 6 => TE::Reduce(1) },
125 hashmap! { 1 => TE::Reduce(2), 2 => TE::Reduce(2), 5 => TE::Reduce(2), 6 => TE::Reduce(2) },
126 hashmap! { 1 => TE::Shift(4), 2 => TE::Shift(5), 5 => TE::Shift(9) },
127 hashmap! { 1 => TE::Reduce(4), 2 => TE::Reduce(4), 5 => TE::Reduce(4), 6 => TE::Reduce(4) }
128];
129}
130
131type TResult = i32;
152
153#[derive(Debug, Clone, Copy)]
165struct Token {
166 kind: i32,
167 value: &'static str,
168
169 start_offset: i32,
170 end_offset: i32,
171 start_line: i32,
172 end_line: i32,
173 start_column: i32,
174 end_column: i32,
175}
176
177lazy_static! {
184 static ref REGEX_RULES: Vec<Regex> = LEX_RULES.iter().map(|rule| Regex::new(rule).unwrap()).collect();
188}
189
190struct Tokenizer {
191 string: &'static str,
195
196 cursor: i32,
200
201 states: Vec<&'static str>,
205
206 current_line: i32,
210 current_column: i32,
211 current_line_begin_offset: i32,
212
213 token_start_offset: i32,
217 token_end_offset: i32,
218 token_start_line: i32,
219 token_end_line: i32,
220 token_start_column: i32,
221 token_end_column: i32,
222
223 yytext: &'static str,
227 yyleng: usize,
228
229 handlers: [fn(&mut Tokenizer) -> &'static str; 6],
230}
231
232impl Tokenizer {
233
234 pub fn new() -> Tokenizer {
241 let mut tokenizer = Tokenizer {
242 string: "",
243 cursor: 0,
244
245 states: Vec::new(),
246
247 current_line: 1,
248 current_column: 0,
249 current_line_begin_offset: 0,
250
251 token_start_offset: 0,
252 token_end_offset: 0,
253 token_start_line: 0,
254 token_end_line: 0,
255 token_start_column: 0,
256 token_end_column: 0,
257
258 yytext: "",
259 yyleng: 0,
260
261 handlers: [
262 Tokenizer::_lex_rule0,
263 Tokenizer::_lex_rule1,
264 Tokenizer::_lex_rule2,
265 Tokenizer::_lex_rule3,
266 Tokenizer::_lex_rule4,
267 Tokenizer::_lex_rule5
268],
269 };
270
271 tokenizer
272 }
273
274 pub fn init_string(&mut self, string: &'static str) -> &mut Tokenizer {
278 self.string = string;
279
280 self.states.clear();
282 self.states.push("INITIAL");
283
284 self.cursor = 0;
285 self.current_line = 1;
286 self.current_column = 0;
287 self.current_line_begin_offset = 0;
288
289 self.token_start_offset = 0;
290 self.token_end_offset = 0;
291 self.token_start_line = 0;
292 self.token_end_line = 0;
293 self.token_start_column = 0;
294 self.token_end_column = 0;
295
296 self
297 }
298
299 pub fn get_next_token(&mut self) -> Token {
303 if !self.has_more_tokens() {
304 self.yytext = EOF;
305 return self.to_token(EOF)
306 }
307
308 let str_slice = &self.string[self.cursor as usize..];
309
310 let lex_rules_for_state = LEX_RULES_BY_START_CONDITIONS
311 .get(self.get_current_state())
312 .unwrap();
313
314 for i in lex_rules_for_state {
315 let i = *i as usize;
316
317 if let Some(matched) = self._match(str_slice, ®EX_RULES[i]) {
318
319 if matched.len() == 0 {
322 self.cursor = self.cursor + 1;
323 }
324
325 self.yytext = matched;
326 self.yyleng = matched.len();
327
328 let token_type = self.handlers[i](self);
329
330 if token_type.len() == 0 {
332 return self.get_next_token();
333 }
334
335 return self.to_token(token_type)
336 }
337 }
338
339 if self.is_eof() {
340 self.cursor = self.cursor + 1;
341 self.yytext = EOF;
342 return self.to_token(EOF);
343 }
344
345 self.panic_unexpected_token(
346 &str_slice[0..1],
347 self.current_line,
348 self.current_column
349 );
350
351 unreachable!()
352 }
353
354 fn panic_unexpected_token(&self, string: &'static str, line: i32, column: i32) {
360 let line_source = self.string
361 .split('\n')
362 .collect::<Vec<&str>>()
363 [(line - 1) as usize];
364
365 let pad = ::std::iter::repeat(" ")
366 .take(column as usize)
367 .collect::<String>();
368
369 let line_data = format!("\n\n{}\n{}^\n", line_source, pad);
370
371 panic!(
372 "{} Unexpected token: \"{}\" at {}:{}.",
373 line_data,
374 string,
375 line,
376 column
377 );
378 }
379
380 fn capture_location(&mut self, matched: &'static str) {
381 let nl_re = Regex::new(r"\n").unwrap();
382
383 self.token_start_offset = self.cursor;
385
386 self.token_start_line = self.current_line;
388 self.token_start_column = self.token_start_offset - self.current_line_begin_offset;
389
390 for cap in nl_re.captures_iter(matched) {
392 self.current_line = self.current_line + 1;
393 self.current_line_begin_offset = self.token_start_offset +
394 cap.get(0).unwrap().start() as i32 + 1;
395 }
396
397 self.token_end_offset = self.cursor + matched.len() as i32;
398
399 self.token_end_line = self.current_line;
401 self.token_end_column = self.token_end_offset - self.current_line_begin_offset;
402 self.current_column = self.token_end_column;
403 }
404
405 fn _match(&mut self, str_slice: &'static str, re: &Regex) -> Option<&'static str> {
406 match re.captures(str_slice) {
407 Some(caps) => {
408 let matched = caps.get(0).unwrap().as_str();
409 self.capture_location(matched);
410 self.cursor = self.cursor + (matched.len() as i32);
411 Some(matched)
412 },
413 None => None
414 }
415 }
416
417 fn to_token(&self, token: &'static str) -> Token {
418 Token {
419 kind: *TOKENS_MAP.get(token).unwrap(),
420 value: self.yytext,
421 start_offset: self.token_start_offset,
422 end_offset: self.token_end_offset,
423 start_line: self.token_start_line,
424 end_line: self.token_end_line,
425 start_column: self.token_start_column,
426 end_column: self.token_end_column,
427 }
428 }
429
430 pub fn has_more_tokens(&mut self) -> bool {
434 self.cursor <= self.string.len() as i32
435 }
436
437 pub fn is_eof(&mut self) -> bool {
441 self.cursor == self.string.len() as i32
442 }
443
444 pub fn get_current_state(&mut self) -> &'static str {
448 match self.states.last() {
449 Some(last) => last,
450 None => "INITIAL"
451 }
452 }
453
454 pub fn push_state(&mut self, state: &'static str) -> &mut Tokenizer {
458 self.states.push(state);
459 self
460 }
461
462 pub fn begin(&mut self, state: &'static str) -> &mut Tokenizer {
466 self.push_state(state);
467 self
468 }
469
470 pub fn pop_state(&mut self) -> &'static str {
474 match self.states.pop() {
475 Some(top) => top,
476 None => "INITIAL"
477 }
478 }
479
480 fn _lex_rule0(&mut self) -> &'static str {
484return "";
485}
486
487fn _lex_rule1(&mut self) -> &'static str {
488return "NUMBER";
489}
490
491fn _lex_rule2(&mut self) -> &'static str {
492return "+";
493}
494
495fn _lex_rule3(&mut self) -> &'static str {
496return "*";
497}
498
499fn _lex_rule4(&mut self) -> &'static str {
500return "(";
501}
502
503fn _lex_rule5(&mut self) -> &'static str {
504return ")";
505}
506}
507
508
509pub struct Parser {
516 values_stack: Vec<SV>,
520
521 states_stack: Vec<usize>,
525
526 tokenizer: Tokenizer,
530
531 handlers: [fn(&mut Parser) -> SV; 5],
535}
536
537impl Parser {
538 pub fn new() -> Parser {
542 Parser {
543 values_stack: Vec::new(),
545 states_stack: Vec::new(),
546
547 tokenizer: Tokenizer::new(),
548
549 handlers: [
550 Parser::_handler0,
551 Parser::_handler1,
552 Parser::_handler2,
553 Parser::_handler3,
554 Parser::_handler4
555],
556 }
557 }
558
559 pub fn parse(&mut self, string: &'static str) -> TResult {
563
564
565 self.tokenizer.init_string(string);
567
568 self.values_stack.clear();
570
571 self.states_stack.clear();
573 self.states_stack.push(0);
574
575 let mut token = self.tokenizer.get_next_token();
576 let mut shifted_token = token;
577
578 loop {
579 let state = *self.states_stack.last().unwrap();
580 let column = token.kind;
581
582 if !TABLE[state].contains_key(&column) {
583 self.unexpected_token(&token);
584 break;
585 }
586
587 let entry = &TABLE[state][&column];
588
589 match entry {
590
591 &TE::Shift(next_state) => {
593 self.values_stack.push(SV::_0(token));
595
596 self.states_stack.push(next_state as usize);
598
599 shifted_token = token;
600 token = self.tokenizer.get_next_token();
601 },
602
603 &TE::Reduce(production_number) => {
605 let production = PRODUCTIONS[production_number];
606
607 self.tokenizer.yytext = shifted_token.value;
608 self.tokenizer.yyleng = shifted_token.value.len();
609
610 let mut rhs_length = production[1];
611 while rhs_length > 0 {
612 self.states_stack.pop();
613 rhs_length = rhs_length - 1;
614 }
615
616 let result_value = self.handlers[production_number](self);
618
619 let previous_state = *self.states_stack.last().unwrap();
620 let symbol_to_reduce_with = production[0];
621
622 self.values_stack.push(result_value);
624
625 let next_state = match &TABLE[previous_state][&symbol_to_reduce_with] {
626 &TE::Transit(next_state) => next_state,
627 _ => unreachable!(),
628 };
629
630 self.states_stack.push(next_state);
631 },
632
633 &TE::Accept => {
635 self.states_stack.pop();
637
638 let parsed = self.values_stack.pop().unwrap();
640
641 if self.states_stack.len() != 1 ||
642 self.states_stack.pop().unwrap() != 0 ||
643 self.tokenizer.has_more_tokens() {
644 self.unexpected_token(&token);
645 }
646
647 let result = get_result!(parsed, _1);
648
649 return result;
650 },
651
652 _ => unreachable!(),
653 }
654 }
655
656 unreachable!();
657 }
658
659 fn unexpected_token(&mut self, token: &Token) {
660
661 if token.value == EOF && !self.tokenizer.has_more_tokens() {
662 panic!("Unexpected end of input.");
663 }
664 self.tokenizer.panic_unexpected_token(token.value, token.start_line, token.start_column);
665
666 }
667
668 fn _handler0(&mut self) -> SV {
669let mut _1 = self.values_stack.pop().unwrap();
671
672let __ = _1;
673__
674}
675
676fn _handler1(&mut self) -> SV {
677let mut _3 = pop!(self.values_stack, _1);
679self.values_stack.pop();
680let mut _1 = pop!(self.values_stack, _1);
681
682let __ = _1 + _3;
685SV::_1(__)
686}
687
688fn _handler2(&mut self) -> SV {
689let mut _3 = pop!(self.values_stack, _1);
691self.values_stack.pop();
692let mut _1 = pop!(self.values_stack, _1);
693
694let __ = _1 * _3;
695SV::_1(__)
696}
697
698fn _handler3(&mut self) -> SV {
699self.values_stack.pop();
701
702let __ = self.tokenizer.yytext.parse::<i32>().unwrap();
703SV::_1(__)
704}
705
706fn _handler4(&mut self) -> SV {
707self.values_stack.pop();
709let mut _2 = self.values_stack.pop().unwrap();
710self.values_stack.pop();
711
712let __ = _2;
713__
714}
715}