act_file/
lib.rs

1#![allow(dead_code)]
2#![allow(unused_mut)]
3#![allow(unreachable_code)]
4
5extern crate regex;
6
7#[macro_use]
8extern crate lazy_static;
9
10use regex::Regex;
11use std::collections::HashMap;
12
13/**
14 * Stack value.
15 */
16enum SV {
17    Undefined,
18    _0(Token),
19    _1(i32)
20}
21
22/**
23 * Lex rules.
24 */
25static LEX_RULES: [&'static str; 6] = [
26    r##########"^\s+"##########,
27    r##########"^\d+"##########,
28    r##########"^\+"##########,
29    r##########"^\*"##########,
30    r##########"^\("##########,
31    r##########"^\)"##########
32];
33
34/**
35 * EOF value.
36 */
37static EOF: &'static str = "$";
38
39/**
40 * A macro for map literals.
41 *
42 * hashmap!{ 1 => "one", 2 => "two" };
43 */
44macro_rules! hashmap(
45    { $($key:expr => $value:expr),+ } => {
46        {
47            let mut m = ::std::collections::HashMap::new();
48            $(
49                m.insert($key, $value);
50            )+
51            m
52        }
53     };
54);
55
56/**
57 * Unwraps a SV for the result. The result type is known from the grammar.
58 */
59macro_rules! get_result {
60    ($r:expr, $ty:ident) => (match $r { SV::$ty(v) => v, _ => unreachable!() });
61}
62
63/**
64 * Pops a SV with needed enum value.
65 */
66macro_rules! pop {
67    ($s:expr, $ty:ident) => (get_result!($s.pop().unwrap(), $ty));
68}
69
70/**
71 * Productions data.
72 *
73 * 0 - encoded non-terminal, 1 - length of RHS to pop from the stack
74 */
75static PRODUCTIONS : [[i32; 2]; 5] = [
76    [-1, 1],
77    [0, 3],
78    [0, 3],
79    [0, 1],
80    [0, 3]
81];
82
83/**
84 * Table entry.
85 */
86enum TE {
87    Accept,
88
89    // Shift, and transit to the state.
90    Shift(usize),
91
92    // Reduce by a production number.
93    Reduce(usize),
94
95    // Simple state transition.
96    Transit(usize),
97}
98
99lazy_static! {
100    /**
101     * Lexical rules grouped by lexer state (by start condition).
102     */
103    static ref LEX_RULES_BY_START_CONDITIONS: HashMap<&'static str, Vec<i32>> = hashmap! { "INITIAL" => vec! [ 0, 1, 2, 3, 4, 5 ] };
104
105    /**
106     * Maps a string name of a token type to its encoded number (the first
107     * token number starts after all numbers for non-terminal).
108     */
109    static ref TOKENS_MAP: HashMap<&'static str, i32> = hashmap! { "+" => 1, "*" => 2, "NUMBER" => 3, "(" => 4, ")" => 5, "$" => 6 };
110
111    /**
112     * Parsing table.
113     *
114     * Vector index is the state number, value is a map
115     * from an encoded symbol to table entry (TE).
116     */
117    static ref TABLE: Vec<HashMap<i32, TE>>= vec![
118    hashmap! { 0 => TE::Transit(1), 3 => TE::Shift(2), 4 => TE::Shift(3) },
119    hashmap! { 1 => TE::Shift(4), 2 => TE::Shift(5), 6 => TE::Accept },
120    hashmap! { 1 => TE::Reduce(3), 2 => TE::Reduce(3), 5 => TE::Reduce(3), 6 => TE::Reduce(3) },
121    hashmap! { 0 => TE::Transit(8), 3 => TE::Shift(2), 4 => TE::Shift(3) },
122    hashmap! { 0 => TE::Transit(6), 3 => TE::Shift(2), 4 => TE::Shift(3) },
123    hashmap! { 0 => TE::Transit(7), 3 => TE::Shift(2), 4 => TE::Shift(3) },
124    hashmap! { 1 => TE::Reduce(1), 2 => TE::Shift(5), 5 => TE::Reduce(1), 6 => TE::Reduce(1) },
125    hashmap! { 1 => TE::Reduce(2), 2 => TE::Reduce(2), 5 => TE::Reduce(2), 6 => TE::Reduce(2) },
126    hashmap! { 1 => TE::Shift(4), 2 => TE::Shift(5), 5 => TE::Shift(9) },
127    hashmap! { 1 => TE::Reduce(4), 2 => TE::Reduce(4), 5 => TE::Reduce(4), 6 => TE::Reduce(4) }
128];
129}
130
131// ------------------------------------
132// Module include prologue.
133//
134// Should include at least result type:
135//
136// type TResult = <...>;
137//
138// Can also include parsing hooks:
139//
140//   fn on_parse_begin(parser: &mut Parser, string: &'static str) {
141//     ...
142//   }
143//
144//   fn on_parse_begin(parser: &mut Parser, string: &'static str) {
145//     ...
146//   }
147//
148
149// Important: define the type of the parsing result:
150
151type TResult = i32;
152
153// ---  end of Module include ---------
154
155/**
156 * Generic tokenizer used by the parser in the Syntax tool.
157 *
158 * https://www.npmjs.com/package/syntax-cli
159 */
160
161// ------------------------------------------------------------------
162// Token.
163
164#[derive(Debug, Clone, Copy)]
165struct Token {
166    kind: i32,
167    value: &'static str,
168
169    start_offset: i32,
170    end_offset: i32,
171    start_line: i32,
172    end_line: i32,
173    start_column: i32,
174    end_column: i32,
175}
176
177// NOTE: LEX_RULES_BY_START_CONDITIONS, and TOKENS_MAP
178// are defined in the lazy_static! block in lr.templates.rs
179
180// ------------------------------------------------------------------
181// Tokenizer.
182
183lazy_static! {
184    /** 
185     * Pre-parse the regex instead of parsing it every time when calling `get_next_token`.
186     */
187    static ref REGEX_RULES: Vec<Regex> = LEX_RULES.iter().map(|rule| Regex::new(rule).unwrap()).collect();
188}
189
190struct Tokenizer {
191    /**
192     * Tokenizing string.
193     */
194    string: &'static str,
195
196    /**
197     * Cursor for current symbol.
198     */
199    cursor: i32,
200
201    /**
202     * States.
203     */
204    states: Vec<&'static str>,
205
206    /**
207     * Line-based location tracking.
208     */
209    current_line: i32,
210    current_column: i32,
211    current_line_begin_offset: i32,
212
213    /**
214     * Location data of a matched token.
215     */
216    token_start_offset: i32,
217    token_end_offset: i32,
218    token_start_line: i32,
219    token_end_line: i32,
220    token_start_column: i32,
221    token_end_column: i32,
222
223    /**
224     * Matched text, and its length.
225     */
226    yytext: &'static str,
227    yyleng: usize,
228
229    handlers: [fn(&mut Tokenizer) -> &'static str; 6],
230}
231
232impl Tokenizer {
233
234    /**
235     * Creates a new Tokenizer instance.
236     *
237     * The same instance can be then reused in parser
238     * by calling `init_string`.
239     */
240    pub fn new() -> Tokenizer {
241        let mut tokenizer = Tokenizer {
242            string: "",
243            cursor: 0,
244
245            states: Vec::new(),
246
247            current_line: 1,
248            current_column: 0,
249            current_line_begin_offset: 0,
250
251            token_start_offset: 0,
252            token_end_offset: 0,
253            token_start_line: 0,
254            token_end_line: 0,
255            token_start_column: 0,
256            token_end_column: 0,
257
258            yytext: "",
259            yyleng: 0,
260
261            handlers: [
262    Tokenizer::_lex_rule0,
263    Tokenizer::_lex_rule1,
264    Tokenizer::_lex_rule2,
265    Tokenizer::_lex_rule3,
266    Tokenizer::_lex_rule4,
267    Tokenizer::_lex_rule5
268],
269        };
270
271        tokenizer
272    }
273
274    /**
275     * Initializes a parsing string.
276     */
277    pub fn init_string(&mut self, string: &'static str) -> &mut Tokenizer {
278        self.string = string;
279
280        // Initialize states.
281        self.states.clear();
282        self.states.push("INITIAL");
283
284        self.cursor = 0;
285        self.current_line = 1;
286        self.current_column = 0;
287        self.current_line_begin_offset = 0;
288
289        self.token_start_offset = 0;
290        self.token_end_offset = 0;
291        self.token_start_line = 0;
292        self.token_end_line = 0;
293        self.token_start_column = 0;
294        self.token_end_column = 0;
295
296        self
297    }
298
299    /**
300     * Returns next token.
301     */
302    pub fn get_next_token(&mut self) -> Token {
303        if !self.has_more_tokens() {
304            self.yytext = EOF;
305            return self.to_token(EOF)
306        }
307
308        let str_slice = &self.string[self.cursor as usize..];
309
310        let lex_rules_for_state = LEX_RULES_BY_START_CONDITIONS
311            .get(self.get_current_state())
312            .unwrap();
313
314        for i in lex_rules_for_state {
315            let i = *i as usize;
316            
317            if let Some(matched) = self._match(str_slice, &REGEX_RULES[i]) {
318
319                // Manual handling of EOF token (the end of string). Return it
320                // as `EOF` symbol.
321                if matched.len() == 0 {
322                    self.cursor = self.cursor + 1;
323                }
324                
325                self.yytext = matched;
326                self.yyleng = matched.len();
327
328                let token_type = self.handlers[i](self);
329
330                // "" - no token (skip)
331                if token_type.len() == 0 {
332                    return self.get_next_token();
333                }
334
335                return self.to_token(token_type)
336            }
337        }
338
339        if self.is_eof() {
340            self.cursor = self.cursor + 1;
341            self.yytext = EOF;
342            return self.to_token(EOF);
343        }
344
345        self.panic_unexpected_token(
346            &str_slice[0..1],
347            self.current_line,
348            self.current_column
349        );
350
351        unreachable!()
352    }
353
354    /**
355     * Throws default "Unexpected token" exception, showing the actual
356     * line from the source, pointing with the ^ marker to the bad token.
357     * In addition, shows `line:column` location.
358     */
359    fn panic_unexpected_token(&self, string: &'static str, line: i32, column: i32) {
360        let line_source = self.string
361            .split('\n')
362            .collect::<Vec<&str>>()
363            [(line - 1) as usize];
364
365        let pad = ::std::iter::repeat(" ")
366            .take(column as usize)
367            .collect::<String>();
368
369        let line_data = format!("\n\n{}\n{}^\n", line_source, pad);
370
371        panic!(
372            "{} Unexpected token: \"{}\" at {}:{}.",
373            line_data,
374            string,
375            line,
376            column
377        );
378    }
379
380    fn capture_location(&mut self, matched: &'static str) {
381        let nl_re = Regex::new(r"\n").unwrap();
382
383        // Absolute offsets.
384        self.token_start_offset = self.cursor;
385
386        // Line-based locations, start.
387        self.token_start_line = self.current_line;
388        self.token_start_column = self.token_start_offset - self.current_line_begin_offset;
389
390        // Extract `\n` in the matched token.
391        for cap in nl_re.captures_iter(matched) {
392            self.current_line = self.current_line + 1;
393            self.current_line_begin_offset = self.token_start_offset +
394                cap.get(0).unwrap().start() as i32 + 1;
395        }
396
397        self.token_end_offset = self.cursor + matched.len() as i32;
398
399        // Line-based locations, end.
400        self.token_end_line = self.current_line;
401        self.token_end_column = self.token_end_offset - self.current_line_begin_offset;
402        self.current_column = self.token_end_column;
403    }
404
405    fn _match(&mut self, str_slice: &'static str, re: &Regex) -> Option<&'static str> {
406        match re.captures(str_slice) {
407            Some(caps) => {
408                let matched = caps.get(0).unwrap().as_str();
409                self.capture_location(matched);
410                self.cursor = self.cursor + (matched.len() as i32);
411                Some(matched)
412            },
413            None => None
414        }
415    }
416
417    fn to_token(&self, token: &'static str) -> Token {
418        Token {
419            kind: *TOKENS_MAP.get(token).unwrap(),
420            value: self.yytext,
421            start_offset: self.token_start_offset,
422            end_offset: self.token_end_offset,
423            start_line: self.token_start_line,
424            end_line: self.token_end_line,
425            start_column: self.token_start_column,
426            end_column: self.token_end_column,
427        }
428    }
429
430    /**
431     * Whether there are still tokens in the stream.
432     */
433    pub fn has_more_tokens(&mut self) -> bool {
434        self.cursor <= self.string.len() as i32
435    }
436
437    /**
438     * Whether the cursor is at the EOF.
439     */
440    pub fn is_eof(&mut self) -> bool {
441        self.cursor == self.string.len() as i32
442    }
443
444    /**
445     * Returns current tokenizing state.
446     */
447    pub fn get_current_state(&mut self) -> &'static str {
448        match self.states.last() {
449            Some(last) => last,
450            None => "INITIAL"
451        }
452    }
453
454    /**
455     * Enters a new state pushing it on the states stack.
456     */
457    pub fn push_state(&mut self, state: &'static str) -> &mut Tokenizer {
458        self.states.push(state);
459        self
460    }
461
462    /**
463     * Alias for `push_state`.
464     */
465    pub fn begin(&mut self, state: &'static str) -> &mut Tokenizer {
466        self.push_state(state);
467        self
468    }
469
470    /**
471     * Exits a current state popping it from the states stack.
472     */
473    pub fn pop_state(&mut self) -> &'static str {
474        match self.states.pop() {
475            Some(top) => top,
476            None => "INITIAL"
477        }
478    }
479
480    /**
481     * Lex rule handlers.
482     */
483    fn _lex_rule0(&mut self) -> &'static str {
484/* skip whitespace */ return "";
485}
486
487fn _lex_rule1(&mut self) -> &'static str {
488return "NUMBER";
489}
490
491fn _lex_rule2(&mut self) -> &'static str {
492return "+";
493}
494
495fn _lex_rule3(&mut self) -> &'static str {
496return "*";
497}
498
499fn _lex_rule4(&mut self) -> &'static str {
500return "(";
501}
502
503fn _lex_rule5(&mut self) -> &'static str {
504return ")";
505}
506}
507
508
509// ------------------------------------------------------------------
510// Parser.
511
512/**
513 * Parser.
514 */
515pub struct Parser {
516    /**
517     * Parsing stack: semantic values.
518     */
519    values_stack: Vec<SV>,
520
521    /**
522     * Parsing stack: state numbers.
523     */
524    states_stack: Vec<usize>,
525
526    /**
527     * Tokenizer instance.
528     */
529    tokenizer: Tokenizer,
530
531    /**
532     * Semantic action handlers.
533     */
534    handlers: [fn(&mut Parser) -> SV; 5],
535}
536
537impl Parser {
538    /**
539     * Creates a new Parser instance.
540     */
541    pub fn new() -> Parser {
542        Parser {
543            // Stacks.
544            values_stack: Vec::new(),
545            states_stack: Vec::new(),
546
547            tokenizer: Tokenizer::new(),
548
549            handlers: [
550    Parser::_handler0,
551    Parser::_handler1,
552    Parser::_handler2,
553    Parser::_handler3,
554    Parser::_handler4
555],
556        }
557    }
558
559    /**
560     * Parses a string.
561     */
562    pub fn parse(&mut self, string: &'static str) -> TResult {
563        
564
565        // Initialize the tokenizer and the string.
566        self.tokenizer.init_string(string);
567
568        // Initialize the stacks.
569        self.values_stack.clear();
570
571        // Initial 0 state.
572        self.states_stack.clear();
573        self.states_stack.push(0);
574
575        let mut token = self.tokenizer.get_next_token();
576        let mut shifted_token = token;
577
578        loop {
579            let state = *self.states_stack.last().unwrap();
580            let column = token.kind;
581
582            if !TABLE[state].contains_key(&column) {
583                self.unexpected_token(&token);
584                break;
585            }
586
587            let entry = &TABLE[state][&column];
588
589            match entry {
590
591                // Shift a token, go to state.
592                &TE::Shift(next_state) => {
593                    // Push token.
594                    self.values_stack.push(SV::_0(token));
595
596                    // Push next state number: "s5" -> 5
597                    self.states_stack.push(next_state as usize);
598
599                    shifted_token = token;
600                    token = self.tokenizer.get_next_token();
601                },
602
603                // Reduce by production.
604                &TE::Reduce(production_number) => {
605                    let production = PRODUCTIONS[production_number];
606
607                    self.tokenizer.yytext = shifted_token.value;
608                    self.tokenizer.yyleng = shifted_token.value.len();
609
610                    let mut rhs_length = production[1];
611                    while rhs_length > 0 {
612                        self.states_stack.pop();
613                        rhs_length = rhs_length - 1;
614                    }
615
616                    // Call the handler, push result onto the stack.
617                    let result_value = self.handlers[production_number](self);
618
619                    let previous_state = *self.states_stack.last().unwrap();
620                    let symbol_to_reduce_with = production[0];
621
622                    // Then push LHS onto the stack.
623                    self.values_stack.push(result_value);
624
625                    let next_state = match &TABLE[previous_state][&symbol_to_reduce_with] {
626                        &TE::Transit(next_state) => next_state,
627                        _ => unreachable!(),
628                    };
629
630                    self.states_stack.push(next_state);
631                },
632
633                // Accept the string.
634                &TE::Accept => {
635                    // Pop state number.
636                    self.states_stack.pop();
637
638                    // Pop the parsed value.
639                    let parsed = self.values_stack.pop().unwrap();
640
641                    if self.states_stack.len() != 1 ||
642                        self.states_stack.pop().unwrap() != 0 ||
643                        self.tokenizer.has_more_tokens() {
644                        self.unexpected_token(&token);
645                    }
646
647                    let result = get_result!(parsed, _1);
648                    
649                    return result;
650                },
651
652                _ => unreachable!(),
653            }
654        }
655
656        unreachable!();
657    }
658
659    fn unexpected_token(&mut self, token: &Token) {
660        
661  if token.value == EOF && !self.tokenizer.has_more_tokens() {
662    panic!("Unexpected end of input.");
663  }
664  self.tokenizer.panic_unexpected_token(token.value, token.start_line, token.start_column);
665
666    }
667
668    fn _handler0(&mut self) -> SV {
669// Semantic values prologue.
670let mut _1 = self.values_stack.pop().unwrap();
671
672let __ = _1;
673__
674}
675
676fn _handler1(&mut self) -> SV {
677// Semantic values prologue.
678let mut _3 = pop!(self.values_stack, _1);
679self.values_stack.pop();
680let mut _1 = pop!(self.values_stack, _1);
681
682// Types of used args, and the return type.
683
684        let __ = _1 + _3;
685SV::_1(__)
686}
687
688fn _handler2(&mut self) -> SV {
689// Semantic values prologue.
690let mut _3 = pop!(self.values_stack, _1);
691self.values_stack.pop();
692let mut _1 = pop!(self.values_stack, _1);
693
694let __ = _1 * _3;
695SV::_1(__)
696}
697
698fn _handler3(&mut self) -> SV {
699// Semantic values prologue.
700self.values_stack.pop();
701
702let __ = self.tokenizer.yytext.parse::<i32>().unwrap();
703SV::_1(__)
704}
705
706fn _handler4(&mut self) -> SV {
707// Semantic values prologue.
708self.values_stack.pop();
709let mut _2 = self.values_stack.pop().unwrap();
710self.values_stack.pop();
711
712let __ = _2;
713__
714}
715}