rslua_march1917/
parser.rs

1use crate::{debuggable, error};
2
3use crate::ast::*;
4use crate::tokens::{Token, TokenType, TokenValue};
5use crate::types::Source;
6
7pub struct Parser<'a> {
8    tokens: Option<&'a Vec<Token>>,
9    current: usize,
10    debug: bool,
11}
12
13#[derive(Debug)]
14pub struct SyntaxError(String);
15
16type ParseResult<T> = Result<T, SyntaxError>;
17
18macro_rules! syntax_error {
19    ($self:ident, $msg:expr) => {{
20        let token = &$self.tokens.unwrap()[$self.current];
21        let ident = match token.value {
22            TokenValue::None => format!("{:?}", token.t),
23            _ => format!("{:?}", token.value),
24        };
25        let error_msg = format!(
26            "[syntax error] {} at line [{}:{}] near [{}]",
27            $msg, token.source.line, token.source.col, ident
28        );
29        error!($self, SyntaxError, error_msg)
30    }};
31}
32
33macro_rules! error_expected {
34    ($self:ident, $expected:expr) => {
35        syntax_error!($self, &format!("{:?} expected", $expected))?
36    };
37}
38
39impl<'a> Parser<'a> {
40    pub fn new() -> Self {
41        Parser {
42            tokens: None,
43            current: 0,
44            debug: false,
45        }
46    }
47
48    pub fn run(&mut self, tokens: &'a Vec<Token>) -> ParseResult<Block> {
49        self.reset();
50        self.tokens = Some(tokens);
51        self.block()
52    }
53
54    // block -> { stat [';'] }
55    fn block(&mut self) -> ParseResult<Block> {
56        let mut stats: Vec<StatInfo> = Vec::new();
57        let saved = self.current_source();
58        while !self.is_block_end() {
59            let (stat, should_break) = match self.current_token_type() {
60                TokenType::Return => (self.stat()?, true),
61                _ => (self.stat()?, false),
62            };
63            if let Some(stat) = stat {
64                let source = self.current_source() - saved;
65                stats.push(StatInfo { source, stat });
66            }
67            if should_break {
68                break;
69            }
70        }
71
72        let is_comment = |toke_type:TokenType|-> bool{
73            toke_type == TokenType::SComment||toke_type == TokenType::MComment
74        };
75        
76        while !self.is_block_end() && is_comment(self.current_token_type()) {
77            let stat = self.stat()?;
78            if let Some(stat) = stat {
79                let source = self.current_source() - saved;
80                stats.push(StatInfo{source, stat});
81            }
82        }
83
84        Ok(Block { stats })
85    }
86
87    fn stat(&mut self) -> ParseResult<Option<Stat>> {
88        let line = self.current_line();
89        let stat = match self.current_token_type() {
90            // stat -> ';' (empty stat)
91            TokenType::Semi => {
92                self.next();
93                return Ok(None);
94            }
95            // stat -> comment
96            TokenType::SComment | TokenType::MComment => {
97                let stat = Stat::CommentStat(CommentStat {
98                    is_single_line: self.current_token_type() == TokenType::SComment,
99                    comment: self.current_token().get_string(),
100                });
101                self.next();
102                stat
103            }
104            // stat -> if stat
105            TokenType::If => Stat::IfStat(self.ifstat()?),
106            // stat -> while stat
107            TokenType::While => Stat::WhileStat(self.whilestat()?),
108            // stat -> DO block END
109            TokenType::Do => {
110                self.next();
111                let block = self.block()?;
112                self.check_match(TokenType::End, TokenType::Do, line)?;
113                Stat::DoBlock(DoBlock { block })
114            }
115            // stat -> forstat
116            TokenType::For => Stat::ForStat(self.forstat()?),
117            // stat -> repeatstat
118            TokenType::Repeat => Stat::RepeatStat(self.repeatstat()?),
119            // stat -> funcstat
120            TokenType::Function => Stat::FuncStat(self.funcstat()?),
121            // stat -> localstat
122            TokenType::Local => {
123                self.next_and_skip_comment();
124                if self.test(TokenType::Function) {
125                    Stat::FuncStat(self.localfunc()?)
126                } else {
127                    Stat::LocalStat(self.localstat()?)
128                }
129            }
130            // stat -> label
131            TokenType::DbColon => {
132                self.next_and_skip_comment();
133                Stat::LabelStat(self.labelstat()?)
134            }
135            // stat -> retstat
136            TokenType::Return => {
137                self.next_and_skip_comment();
138                Stat::RetStat(self.retstat()?)
139            }
140            // stat -> breakstat
141            TokenType::Break => Stat::BreakStat(self.breakstat()?),
142            // stat -> gotostat
143            TokenType::Goto => Stat::GotoStat(self.gotostat()?),
144            // stat -> func | assignment
145            _ => self.exprstat()?,
146        };
147        Ok(Some(stat))
148    }
149
150    // ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END
151    fn ifstat(&mut self) -> ParseResult<IfStat> {
152        let line = self.current_line();
153        let mut cond_blocks: Vec<CondBlock> = Vec::new();
154        cond_blocks.push(self.test_then_block()?);
155
156        self.skip_comment();
157        while self.current_token_type() == TokenType::ElseIf {
158            cond_blocks.push(self.test_then_block()?);
159            self.skip_comment();
160        }
161        let mut else_block = None;
162        if self.test_next(TokenType::Else) {
163            else_block = Some(self.block()?);
164        }
165        self.check_match(TokenType::End, TokenType::If, line)?;
166        Ok(IfStat {
167            cond_blocks,
168            else_block,
169        })
170    }
171
172    //  [IF | ELSEIF] cond THEN block
173    fn test_then_block(&mut self) -> ParseResult<CondBlock> {
174        self.next_and_skip_comment();
175        let cond = self.cond()?;
176        self.check_next(TokenType::Then)?;
177        let block = self.block()?;
178        Ok(CondBlock { cond, block })
179    }
180
181    // whilestat -> WHILE cond DO block END
182    fn whilestat(&mut self) -> ParseResult<WhileStat> {
183        let line = self.current_line();
184        self.next_and_skip_comment();
185        let cond = self.cond()?;
186        self.check_next(TokenType::Do)?;
187        let block = self.block()?;
188        self.check_match(TokenType::End, TokenType::While, line)?;
189        Ok(WhileStat { cond, block })
190    }
191
192    fn cond(&mut self) -> ParseResult<Expr> {
193        self.expr()
194    }
195
196    // forstat -> FOR (fornum | forlist) END
197    fn forstat(&mut self) -> ParseResult<ForStat> {
198        let line = self.current_line();
199        self.next_and_skip_comment();
200        let var_name = self.check_name()?;
201        let forstat = match self.current_token_type() {
202            TokenType::Assign => self.forenum(&var_name),
203            TokenType::Comma | TokenType::In => self.forlist(&var_name),
204            _ => syntax_error!(self, "'=' or 'in' expected"),
205        };
206        match forstat {
207            Ok(stat) => {
208                self.check_match(TokenType::End, TokenType::For, line)?;
209                Ok(stat)
210            }
211            Err(err) => Err(err),
212        }
213    }
214
215    // fornum -> NAME = exp1,exp1[,exp1] forbody
216    fn forenum(&mut self, var_name: &str) -> ParseResult<ForStat> {
217        self.next_and_skip_comment();
218        let init = self.expr()?;
219        self.check_next(TokenType::Comma)?;
220        self.skip_comment();
221        let limit = self.expr()?;
222        let mut step = None;
223        if self.test_next(TokenType::Comma) {
224            step = Some(self.expr()?);
225        }
226        self.check_next(TokenType::Do)?;
227        let body = self.block()?;
228        Ok(ForStat::ForNum(ForNum {
229            var: String::from(var_name),
230            init,
231            limit,
232            step,
233            body,
234        }))
235    }
236
237    // forlist -> NAME {,NAME} IN explist forbody
238    fn forlist(&mut self, var_name: &str) -> ParseResult<ForStat> {
239        let mut vars: Vec<String> = Vec::new();
240        vars.push(String::from(var_name));
241        while self.test_next(TokenType::Comma) {
242            vars.push(self.check_name()?);
243        }
244        self.check_next(TokenType::In)?;
245        self.skip_comment();
246        let exprs = self.exprlist()?;
247        self.check_next(TokenType::Do)?;
248        let body = self.block()?;
249        Ok(ForStat::ForList(ForList { vars, exprs, body }))
250    }
251
252    // repeatstat -> REPEAT block UNTIL cond
253    fn repeatstat(&mut self) -> ParseResult<RepeatStat> {
254        let line = self.current_line();
255        self.next();
256        let block = self.block()?;
257        self.check_match(TokenType::Until, TokenType::Repeat, line)?;
258        let cond = self.cond()?;
259        Ok(RepeatStat { block, cond })
260    }
261
262    // funcstat -> FUNCTION funcname body
263    fn funcstat(&mut self) -> ParseResult<FuncStat> {
264        self.next_and_skip_comment();
265        let func_name = self.funcname()?;
266        let body = self.funcbody()?;
267        Ok(FuncStat {
268            func_type: FuncType::Global,
269            func_name,
270            body,
271        })
272    }
273
274    // funcname -> NAME {'.' NAME} [':' NAME]
275    fn funcname(&mut self) -> ParseResult<FuncName> {
276        let mut fields: Vec<String> = Vec::new();
277        fields.push(self.check_name()?);
278        while self.test_next(TokenType::Attr) {
279            fields.push(self.check_name()?);
280        }
281        let mut method = None;
282        if self.test_next(TokenType::Colon) {
283            method = Some(self.check_name()?);
284        }
285        Ok(FuncName { fields, method })
286    }
287
288    // body ->  '(' parlist ')' block END
289    fn funcbody(&mut self) -> ParseResult<FuncBody> {
290        let line = self.current_line();
291        self.check_next(TokenType::Lp)?;
292        self.skip_comment();
293        let mut params: Vec<Param> = Vec::new();
294        loop {
295            if self.test(TokenType::Rp) {
296                break;
297            }
298            match self.current_token_type() {
299                TokenType::Dots => {
300                    params.push(Param::VarArg);
301                    self.next_and_skip_comment()
302                }
303                TokenType::Name => params.push(Param::Name(self.check_name()?)),
304                _ => syntax_error!(self, "<name> or '...' expected")?,
305            };
306            if !self.test_next(TokenType::Comma) {
307                break;
308            }
309        }
310        self.check_next(TokenType::Rp)?;
311        let block = self.block()?;
312        self.check_match(TokenType::End, TokenType::Function, line)?;
313        Ok(FuncBody { params, block })
314    }
315
316    // funcstat -> local FUNCTION funcname body
317    fn localfunc(&mut self) -> ParseResult<FuncStat> {
318        self.next_and_skip_comment();
319        let func_name = self.funcname()?;
320        let body = self.funcbody()?;
321        Ok(FuncStat {
322            func_type: FuncType::Local,
323            func_name,
324            body,
325        })
326    }
327
328    // stat -> LOCAL NAME {',' NAME} ['=' explist]
329    fn localstat(&mut self) -> ParseResult<LocalStat> {
330        let mut names: Vec<String> = Vec::new();
331        loop {
332            names.push(self.check_name()?);
333            if !self.test_next(TokenType::Comma) {
334                break;
335            }
336        }
337        let mut exprs: Vec<Expr> = Vec::new();
338        if self.test_next(TokenType::Assign) {
339            exprs = self.exprlist()?;
340        }
341        Ok(LocalStat { names, exprs })
342    }
343
344    // label -> '::' NAME '::'
345    fn labelstat(&mut self) -> ParseResult<LabelStat> {
346        let label = self.check_name()?;
347        self.check_next(TokenType::DbColon)?;
348        Ok(LabelStat { label })
349    }
350
351    // stat -> RETURN [explist] [';']
352    fn retstat(&mut self) -> ParseResult<RetStat> {
353        let mut exprs: Vec<Expr> = Vec::new();
354        if !self.is_block_end() && self.current_token_type() != TokenType::Semi {
355            exprs = self.exprlist()?;
356        }
357        self.test_next(TokenType::Semi);
358        Ok(RetStat { exprs })
359    }
360
361    fn breakstat(&mut self) -> ParseResult<BreakStat> {
362        self.next_and_skip_comment();
363        Ok(BreakStat {})
364    }
365
366    fn gotostat(&mut self) -> ParseResult<GotoStat> {
367        self.next_and_skip_comment();
368        let label = self.check_name()?;
369        Ok(GotoStat { label })
370    }
371
372    // stat -> func call | assignment
373    fn exprstat(&mut self) -> ParseResult<Stat> {
374        let expr = self.suffixedexpr()?;
375        if self.test(TokenType::Assign) || self.test(TokenType::Comma) {
376            Ok(Stat::AssignStat(self.assignment(expr.to_assignable())?))
377        } else {
378            Ok(Stat::CallStat(CallStat {
379                call: expr.to_assignable(),
380            }))
381        }
382    }
383
384    // assignment -> ',' suffixedexp assignment
385    // assignment -> '=' explist
386    fn assignment(&mut self, first: Assignable) -> ParseResult<AssignStat> {
387        let mut left: Vec<Assignable> = Vec::new();
388        left.push(first);
389        while self.test_next(TokenType::Comma) {
390            left.push(self.suffixedexpr()?.to_assignable())
391        }
392        self.check_next(TokenType::Assign)?;
393        self.skip_comment();
394        let right = self.exprlist()?;
395        Ok(AssignStat { left, right })
396    }
397
398    // exprlist -> expr { ',' expr }
399    fn exprlist(&mut self) -> ParseResult<Vec<Expr>> {
400        let mut exprs: Vec<Expr> = Vec::new();
401        exprs.push(self.expr()?);
402        while self.test_next(TokenType::Comma) {
403            exprs.push(self.expr()?)
404        }
405        Ok(exprs)
406    }
407
408    fn expr(&mut self) -> ParseResult<Expr> {
409        self.subexpr(0)
410    }
411
412    fn get_unop(&self) -> UnOp {
413        UnOp::from_token(self.current_token_type())
414    }
415
416    fn get_binop(&self) -> BinOp {
417        let mut current = self.current;
418        let token_type = loop { 
419            if self.tokens.unwrap()[current].is_comment() {
420                current = current + 1;
421            } else {
422                break self.tokens.unwrap()[current].t
423            }
424        };
425        BinOp::from_token(token_type)
426    }
427
428    // subexpr -> (simpleexpr | unop subexpr) { binop subexpr }
429    // where 'binop' is any binary operator with a priority higher than 'limit'
430    fn subexpr(&mut self, limit: u8) -> ParseResult<Expr> {
431        let mut left;
432        let unop = self.get_unop();
433        if unop != UnOp::None {
434            self.next_and_skip_comment();
435            let expr = Box::new(self.subexpr(unop.priority())?);
436            left = Expr::UnExpr(UnExpr { op: unop, expr });
437        } else {
438            left = self.simpleexpr()?;
439        }
440        let mut binop = self.get_binop();
441        while binop != BinOp::None && binop.priority().left > limit {
442            self.skip_comment();
443            self.next_and_skip_comment();
444            let right = self.subexpr(binop.priority().right)?;
445            left = Expr::BinExpr(BinExpr {
446                left: Box::new(left),
447                right: Box::new(right),
448                op: binop,
449            });
450            binop = self.get_binop();
451        }
452        Ok(left)
453    }
454
455    // simpleexpr -> FLT | INT | STRING | NIL | TRUE | FALSE | ... | constructor | FUNCTION body | suffixedexp
456    fn simpleexpr(&mut self) -> ParseResult<Expr> {
457        let token = self.current_token();
458        let expr = match token.t {
459            TokenType::Flt => Expr::Float(token.get_float()),
460            TokenType::Int => Expr::Int(token.get_int()),
461            TokenType::String => Expr::String(token.get_string()),
462            TokenType::Nil => Expr::Nil,
463            TokenType::True => Expr::True,
464            TokenType::False => Expr::False,
465            TokenType::Dots => Expr::VarArg,
466            TokenType::Lb => return Ok(Expr::Table(self.table()?)),
467            TokenType::Function => {
468                self.next_and_skip_comment();
469                return Ok(Expr::FuncBody(self.funcbody()?));
470            }
471            _ => return Ok(self.suffixedexpr()?),
472        };
473        self.next();
474        Ok(expr)
475    }
476
477    // suffixedexpr -> primaryexpr { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
478    fn suffixedexpr(&mut self) -> ParseResult<Expr> {
479        let primary = self.primaryexpr()?;
480        let mut suffixes: Vec<Suffix> = Vec::new();
481        loop {
482            match self.current_token_type() {
483                TokenType::Attr => {
484                    self.next_and_skip_comment();
485                    suffixes.push(Suffix::Attr(self.check_name()?));
486                }
487                TokenType::Ls => {
488                    let line = self.current_line();
489                    self.next_and_skip_comment();
490                    suffixes.push(Suffix::Index(self.expr()?));
491                    self.check_match(TokenType::Rs, TokenType::Ls, line)?;
492                }
493                TokenType::Colon => {
494                    self.next_and_skip_comment();
495                    let name = self.check_name()?;
496                    suffixes.push(Suffix::Method(name));
497                }
498                TokenType::Lp | TokenType::Lb | TokenType::String => {
499                    suffixes.push(Suffix::FuncArgs(self.funcargs()?));
500                }
501                _ => break,
502            }
503        }
504
505        if suffixes.is_empty() {
506            Ok(primary)
507        } else {
508            Ok(Expr::SuffixedExpr(SuffixedExpr {
509                primary: Box::new(primary),
510                suffixes,
511            }))
512        }
513    }
514
515    // primaryexp -> NAME | '(' expr ')'
516    fn primaryexpr(&mut self) -> ParseResult<Expr> {
517        let expr = match self.current_token_type() {
518            TokenType::Name => Expr::Name(self.check_name()?),
519            TokenType::Lp => {
520                let line = self.current_line();
521                self.next_and_skip_comment();
522                let expr = self.expr()?;
523                self.check_match(TokenType::Rp, TokenType::Lp, line)?;
524                Expr::ParenExpr(Box::new(expr))
525            }
526            _ => {
527                return syntax_error!(
528                    self,
529                    &format!("unexpected symbol '{:?}'", self.current_token_type())
530                )
531            }
532        };
533        Ok(expr)
534    }
535
536    // table constructor -> '{' [ field { sep field } [sep] ] '}'
537    // sep -> ',' | ';'
538    fn table(&mut self) -> ParseResult<Table> {
539        let line = self.current_line();
540        self.check_next(TokenType::Lb)?;
541        self.skip_comment();
542        let mut fields: Vec<Field> = Vec::new();
543        loop {
544            if self.test(TokenType::Rb) {
545                break;
546            }
547            fields.push(self.field()?);
548            if !self.test_next(TokenType::Comma) && !self.test_next(TokenType::Semi) {
549                break;
550            } else {
551                // TODO : reverse comment for table fields
552                self.skip_comment();
553            }
554        }
555        self.check_match(TokenType::Rb, TokenType::Lb, line)?;
556        Ok(Table { fields })
557    }
558
559    // field -> listfield | recfield
560    fn field(&mut self) -> ParseResult<Field> {
561        let field = match self.current_token_type() {
562            TokenType::Name => {
563                if self.next_token_type() == TokenType::Assign {
564                    self.recfield()?
565                } else {
566                    self.listfield()?
567                }
568            }
569            TokenType::Ls => self.recfield()?,
570            _ => self.listfield()?,
571        };
572        Ok(field)
573    }
574
575    // recfield -> (NAME | '['exp1']') = exp1
576    fn recfield(&mut self) -> ParseResult<Field> {
577        let key;
578        match self.current_token_type() {
579            TokenType::Name => key = FieldKey::Name(self.check_name()?),
580            TokenType::Ls => {
581                let line = self.current_line();
582                self.next_and_skip_comment();
583                key = FieldKey::Expr(self.expr()?);
584                self.check_match(TokenType::Rs, TokenType::Ls, line)?;
585            }
586            _ => unreachable!(),
587        };
588        self.check_next(TokenType::Assign)?;
589        self.skip_comment();
590        let value = self.expr()?;
591        Ok(Field::RecField(RecField { key, value }))
592    }
593
594    // listfield -> expr
595    fn listfield(&mut self) -> ParseResult<Field> {
596        Ok(Field::ListField(self.expr()?))
597    }
598
599    // funcargs -> '(' [ explist ] ') | table constructor | STRING
600    fn funcargs(&mut self) -> ParseResult<FuncArgs> {
601        let func_args = match self.current_token_type() {
602            TokenType::Lp => {
603                let line = self.current_line();
604                self.next_and_skip_comment();
605
606                // empty arg list
607                if self.test_next(TokenType::Rp) {
608                    return Ok(FuncArgs::Exprs(Vec::<Expr>::new()));
609                }
610
611                let exprs = self.exprlist()?;
612                self.check_match(TokenType::Rp, TokenType::Lp, line)?;
613                FuncArgs::Exprs(exprs)
614            }
615            TokenType::Lb => FuncArgs::Table(self.table()?),
616            TokenType::String => {
617                let arg = FuncArgs::String(self.current_token().get_string());
618                self.next_and_skip_comment();
619                arg
620            }
621            _ => return syntax_error!(self, "function arguments expected"),
622        };
623        Ok(func_args)
624    }
625
626    fn reset(&mut self) {
627        self.current = 0;
628    }
629
630    fn current_token(&self) -> &Token {
631        &self.tokens.unwrap()[self.current]
632    }
633
634    fn next_token(&self) -> &Token {
635        let mut current = self.current + 1;
636        while self.tokens.unwrap()[current].is_comment() {
637            current += 1;
638        }
639        &self.tokens.unwrap()[current]
640    }
641
642    fn current_token_type(&self) -> TokenType {
643        let token = self.current_token();
644        token.t
645    }
646
647    fn current_source(&self) -> Source {
648        let token = self.current_token();
649        token.source
650    }
651
652    fn current_line(&self) -> usize {
653        let token = self.current_token();
654        token.source.line
655    }
656
657    fn next_token_type(&self) -> TokenType {
658        let token = self.next_token();
659        token.t
660    }
661
662    fn next_and_skip_comment(&mut self) {
663        self.current += 1;
664        self.skip_comment();
665    }
666
667    fn next(&mut self) {
668        self.current += 1;
669    }
670
671    fn skip_comment(&mut self) -> usize {
672        let old = self.current;
673        while self.current_token().is_comment() {
674            self.current += 1;
675        }
676        old
677    }
678
679    // if reach a block end
680    fn is_block_end(&self) -> bool {
681        let token = self.current_token();
682        match token.t {
683            TokenType::Else
684            | TokenType::ElseIf
685            | TokenType::End
686            | TokenType::Until
687            | TokenType::Eos => true,
688            _ => false,
689        }
690    }
691
692    fn check_match(&mut self, end: TokenType, start: TokenType, line: usize) -> ParseResult<()> {
693        self.skip_comment();
694        if self.current_token_type() != end {
695            if line == self.current_line() {
696                error_expected!(self, end);
697            } else {
698                syntax_error!(
699                    self,
700                    &format!("{:?} expected (to close {:?} at line {})", end, start, line)
701                )?;
702            }
703        }
704        self.next();
705        Ok(())
706    }
707
708    fn test(&self, expected: TokenType) -> bool {
709        self.current_token_type() == expected
710    }
711
712    fn test_next(&mut self, expected: TokenType) -> bool {
713        let origin = self.skip_comment();
714        if self.test(expected) {
715            self.next();
716            true
717        } else {
718            self.current = origin;
719            false
720        }
721    }
722
723    fn check(&self, expected: TokenType) -> ParseResult<()> {
724        if self.current_token_type() != expected {
725            error_expected!(self, expected)
726        } else {
727            Ok(())
728        }
729    }
730
731    fn check_next(&mut self, expected: TokenType) -> ParseResult<()> {
732        self.skip_comment();
733        self.check(expected)?;
734        self.next();
735        Ok(())
736    }
737
738    fn check_name(&mut self) -> ParseResult<String> {
739        self.skip_comment();
740        self.check(TokenType::Name)?;
741        let token = self.current_token();
742        let name = match &token.value {
743            TokenValue::Str(name) => name.clone(),
744            _ => unreachable!(),
745        };
746        self.next();
747        Ok(name)
748    }
749
750    debuggable!();
751}