badger/
parser.rs

1use lexicon::Token;
2use lexicon::Token::*;
3use tokenizer::Tokenizer;
4use tokenizer::TokenCategory;
5use grammar::*;
6use grammar::OperatorType::*;
7
8/// If the next token matches `$p`, consume that token and return
9/// true, else do nothing and return false
10macro_rules! allow {
11    ($parser:ident, $p:pat) => {
12        match $parser.tokenizer.peek() {
13            Some(&$p) => {
14                $parser.tokenizer.next();
15                true
16            },
17            _ => false
18        }
19    };
20}
21
22macro_rules! unexpected_token {
23    ($parser:ident) => ({
24        unexpected_token!($parser, $parser.consume());
25    });
26    ($parser:ident, $token:expr) => {
27        panic!("Unexpected token {:?}", $token);
28    }
29}
30
31/// Evaluates the `$eval` expression, then expects a semicolon or
32/// end of program. If neither is found, but a LineTermination
33/// occured on previous token, parsing will continue as if a
34/// semicolon was present. In other cases cause a panic.
35macro_rules! statement {
36    ($parser:ident, $eval:expr) => ({
37        let value = $eval;
38
39        $parser.tokenizer.expect_semicolon();
40
41        value
42    })
43}
44
45macro_rules! surround {
46    ($parser:ident, $b1:expr, $eval:expr, $b2:expr) => ({
47        $parser.tokenizer.expect_control($b1);
48        let value = $eval;
49        $parser.tokenizer.expect_control($b2);
50        value
51    });
52}
53
54pub struct Parser<'a> {
55    tokenizer: Tokenizer<'a>,
56    allow_asi: bool,
57}
58
59impl<'a> Parser<'a> {
60    pub fn new(source: &'a str) -> Self {
61        Parser {
62            tokenizer: Tokenizer::new(source),
63            allow_asi: false,
64        }
65    }
66
67    #[inline]
68    fn consume(&mut self) -> Token {
69        self.tokenizer.next().expect("Unexpected end of program")
70    }
71
72    #[inline]
73    fn array_expression(&mut self) -> Expression {
74        Expression::Array(self.expression_list(b']'))
75    }
76
77    fn object_member_list(&mut self) -> Vec<ObjectMember> {
78        let mut list = Vec::new();
79
80        loop {
81            if self.tokenizer.allow_control() == b'}' {
82                self.tokenizer.next();
83                break;
84            }
85
86            list.push(self.object_member());
87
88            if self.tokenizer.allow_control() == b'}' {
89                self.tokenizer.next();
90                break;
91            }
92
93            self.tokenizer.expect_control(b',');
94        }
95
96        list
97    }
98
99    #[inline]
100    fn object_member(&mut self) -> ObjectMember {
101        let category = self.tokenizer.get_category();
102
103        let key = match category {
104            TokenCategory::Label => self.tokenizer.expect_identifier(),
105            TokenCategory::Other => {
106                match self.consume() {
107                    Literal(LiteralString(key)) => key,
108                    token                       => unexpected_token!(self, token),
109                }
110            },
111            TokenCategory::Control => {
112                self.tokenizer.expect_control(b'[');
113
114                let key = self.expression(0);
115
116                self.tokenizer.expect_control(b']');
117
118                return match self.tokenizer.allow_control() {
119                    b':' => {
120                        self.tokenizer.next();
121
122                        ObjectMember::Computed {
123                            key: key,
124                            value: self.expression(0),
125                        }
126                    },
127                    b'(' => {
128                        self.tokenizer.next();
129
130                        ObjectMember::ComputedMethod {
131                            name: key,
132                            params: self.parameter_list(),
133                            body: self.block_body(),
134                        }
135                    },
136                    _ => unexpected_token!(self),
137                };
138            },
139            _ => unexpected_token!(self)
140        };
141
142        match self.tokenizer.allow_control() {
143            b':' => {
144                self.tokenizer.next();
145
146                ObjectMember::Literal {
147                    key: key,
148                    value: self.expression(0),
149                }
150            }
151            b'(' => {
152                self.tokenizer.next();
153
154                ObjectMember::Method {
155                    name: key,
156                    params: self.parameter_list(),
157                    body: self.block_body(),
158                }
159            },
160            _ => ObjectMember::Shorthand {
161                key: key,
162            }
163        }
164    }
165
166    #[inline]
167    fn object_expression(&mut self) -> Expression {
168        Expression::Object(self.object_member_list())
169    }
170
171    #[inline]
172    fn block_or_statement(&mut self) -> Statement {
173        match self.tokenizer.allow_control() {
174            b'{' => {
175                self.tokenizer.next();
176
177                Statement::Block {
178                    body: self.block_body_tail()
179                }
180            },
181            _ => {
182                let token = self.consume();
183                self.expression_statement(token)
184            }
185        }
186    }
187
188    #[inline]
189    fn block_statement(&mut self) -> Statement {
190        Statement::Block {
191            body: self.block_body_tail(),
192        }
193    }
194
195    #[inline]
196    fn block_body_tail(&mut self) -> Vec<Statement> {
197        let mut body = Vec::new();
198
199        loop {
200            if self.tokenizer.allow_control() == b'}' {
201                self.tokenizer.next();
202
203                break;
204            }
205
206            body.push(
207                self.statement().expect("Unexpected end of statements block")
208            )
209        }
210
211        body
212    }
213
214    #[inline]
215    fn block_body(&mut self) -> Vec<Statement> {
216        self.tokenizer.expect_control(b'{');
217        self.block_body_tail()
218    }
219
220    fn arrow_function_expression(&mut self, p: Option<Expression>) -> Expression {
221        let params: Vec<Parameter> = match p {
222            None => Vec::new(),
223            Some(Expression::Identifier(name)) => {
224                vec![Parameter { name: name }]
225            },
226            Some(Expression::Sequence(mut list)) => {
227                list.drain(..).map(|expression| {
228                    match expression {
229                        Expression::Identifier(name) => Parameter { name: name },
230                        _ => panic!("Cannot cast {:?} to a parameter", expression),
231                    }
232                }).collect()
233            },
234            _ =>
235                panic!("Cannot cast {:?} to parameters", p),
236        };
237
238        let body = match self.tokenizer.allow_control() {
239            b'{' => {
240                self.tokenizer.next();
241
242                Statement::Block {
243                    body: self.block_body_tail()
244                }
245            }
246            _    => self.expression(0).into()
247        };
248
249        Expression::ArrowFunction {
250            params: params,
251            body: Box::new(body)
252        }
253    }
254
255    #[inline]
256    fn prefix_expression(&mut self, operator: OperatorType) -> Expression {
257        if !operator.prefix() {
258            panic!("Unexpected operator {:?}", operator);
259        }
260
261        Expression::Prefix {
262            operator: operator,
263            operand: Box::new(self.expression(15)),
264        }
265    }
266
267    #[inline]
268    fn infix_expression(&mut self, left: Expression, bp: u8, op: OperatorType) -> Expression {
269        match op {
270            Increment | Decrement => Expression::Postfix {
271                operator: op,
272                operand: Box::new(left),
273            },
274
275            Accessor => Expression::member(left, self.tokenizer.expect_identifier()),
276
277            Conditional => Expression::Conditional {
278                test: Box::new(left),
279                consequent: Box::new(self.expression(bp)),
280                alternate: {
281                    self.tokenizer.expect_control(b':');
282                    Box::new(self.expression(bp))
283                }
284            },
285
286            FatArrow => self.arrow_function_expression(Some(left)),
287
288            _ => {
289                if !op.infix() {
290                    panic!("Unexpected operator {:?}", op);
291                }
292
293                if op.assignment() {
294                    // TODO: verify that left is assignable
295                }
296
297                Expression::binary(left, op, self.expression(bp))
298            }
299        }
300    }
301
302    fn function_expression(&mut self) -> Expression {
303        let name = match self.tokenizer.peek() {
304            Some(&Identifier(ref name)) => Some(*name),
305            _                           => None
306        };
307
308        if name.is_some() {
309            self.tokenizer.next();
310        }
311
312        Expression::Function {
313            name: name,
314            params: self.parameter_list(),
315            body: self.block_body(),
316        }
317    }
318
319    #[inline]
320    fn paren_expression(&mut self) -> Expression {
321        if self.tokenizer.allow_control() == b')' {
322            self.tokenizer.next();
323
324            match self.consume() {
325                Operator(FatArrow) => {},
326                token              => unexpected_token!(self, token)
327            }
328
329            return self.arrow_function_expression(None);
330        }
331
332        let expression = self.sequence_or_expression();
333        self.tokenizer.expect_control(b')');
334
335        expression
336    }
337
338    #[inline]
339    fn sequence_or_expression_from_token(&mut self, token: Token) -> Expression {
340        let first = self.expression_from_token(token, 0);
341        self.sequence_or(first)
342    }
343
344    #[inline]
345    fn sequence_or(&mut self, first: Expression) -> Expression {
346        match self.tokenizer.allow_control() {
347            b',' => {
348                self.tokenizer.next();
349
350                let mut list = vec![first, self.expression(0)];
351
352                while self.tokenizer.allow_control() == b',' {
353                    self.tokenizer.next();
354
355                    list.push(self.expression(0));
356                }
357
358                Expression::Sequence(list)
359            },
360            _ => first
361        }
362    }
363
364    #[inline]
365    fn sequence_or_expression(&mut self) -> Expression {
366        let token = self.consume();
367        self.sequence_or_expression_from_token(token)
368    }
369
370    fn expression_list(&mut self, terminator: u8) -> Vec<Expression> {
371        let mut list = Vec::new();
372
373        loop {
374            if self.tokenizer.allow_control() == terminator {
375                self.tokenizer.next();
376                break;
377            }
378
379            list.push(self.expression(0));
380
381            if self.tokenizer.allow_control() == terminator {
382                self.tokenizer.next();
383                break;
384            }
385
386            self.tokenizer.expect_control(b',');
387        }
388
389        list
390    }
391
392    #[inline]
393    fn expression(&mut self, lbp: u8) -> Expression {
394        let token = self.consume();
395        self.expression_from_token(token, lbp)
396    }
397
398    #[inline]
399    fn expression_from_token(&mut self, token: Token, lbp: u8) -> Expression {
400        let left = match token {
401            This              => Expression::This,
402            Literal(value)    => Expression::Literal(value),
403            Identifier(value) => value.into(),
404            Operator(optype)  => self.prefix_expression(optype),
405            Control(b'(')     => self.paren_expression(),
406            Control(b'[')     => self.array_expression(),
407            Control(b'{')     => self.object_expression(),
408            Function          => self.function_expression(),
409            token             => unexpected_token!(self, token)
410        };
411
412        self.complex_expression(left, lbp)
413    }
414
415    fn complex_expression(&mut self, mut left: Expression, lbp: u8) -> Expression {
416        loop {
417            let op = match self.tokenizer.peek() {
418                Some(&Operator(ref op)) => Some(*op),
419                _                       => None,
420            };
421
422            if let Some(op) = op {
423                self.tokenizer.next();
424
425                let rbp = op.binding_power();
426
427                if lbp > rbp {
428                    break;
429                }
430
431                left = self.infix_expression(left, rbp, op);
432
433                continue;
434            }
435
436            if lbp > 0 {
437                break;
438            }
439
440            left = match self.tokenizer.allow_control() {
441                b'(' => {
442                    self.tokenizer.next();
443
444                    Expression::Call {
445                        callee: Box::new(left),
446                        arguments: self.expression_list(b')'),
447                    }
448                },
449
450                b'[' => {
451                    self.tokenizer.next();
452
453                    let property = self.sequence_or_expression();
454
455                    self.tokenizer.expect_control(b']');
456
457                    Expression::ComputedMember {
458                        object: Box::new(left),
459                        property: Box::new(property),
460                    }
461                },
462
463                _ => break
464            }
465        }
466
467        left
468    }
469
470    /// Helper for the `for` loops that doesn't consume semicolons
471    fn variable_declaration(
472        &mut self, kind: VariableDeclarationKind
473    ) -> Statement {
474        let mut declarators = Vec::new();
475
476        loop {
477            declarators.push(VariableDeclarator {
478                name: self.tokenizer.expect_identifier(),
479                value: match self.tokenizer.peek() {
480                    Some(&Operator(Assign)) => {
481                        self.tokenizer.next();
482                        Some(self.expression(0))
483                    },
484                    _ => None
485                }
486            });
487
488            if self.tokenizer.allow_control() == b',' {
489                self.tokenizer.next();
490
491                continue;
492            }
493
494            break;
495        }
496
497        Statement::VariableDeclaration {
498            kind: kind,
499            declarators: declarators,
500        }
501    }
502
503    #[inline]
504    fn variable_declaration_statement(
505        &mut self, kind: VariableDeclarationKind
506    ) -> Statement {
507        statement!(self, self.variable_declaration(kind))
508    }
509
510    #[inline]
511    fn labeled_or_expression_statement(&mut self, label: OwnedSlice) -> Statement {
512        match self.tokenizer.allow_control() {
513            b':' => {
514                self.tokenizer.next();
515
516                Statement::Labeled {
517                    label: label,
518                    body: Box::new(self.statement().expect("Expected statement")),
519                }
520            },
521            _ => {
522                let first = self.complex_expression(label.into(), 0);
523
524                statement!(self, self.sequence_or(first).into())
525            }
526        }
527    }
528
529    #[inline]
530    fn expression_statement(&mut self, token: Token) -> Statement {
531        statement!(self, self.sequence_or_expression_from_token(token).into())
532    }
533
534    #[inline]
535    fn return_statement(&mut self) -> Statement {
536        statement!(self, Statement::Return {
537            value: match self.tokenizer.peek() {
538                None                 => None,
539                Some(&Control(b';')) => None,
540                _                    => {
541                    if self.allow_asi {
542                        None
543                    } else {
544                        Some(self.sequence_or_expression())
545                    }
546                }
547            }
548        })
549    }
550
551    #[inline]
552    fn throw_statement(&mut self) -> Statement {
553        statement!(self, Statement::Throw {
554            value: self.sequence_or_expression()
555        })
556    }
557
558    #[inline]
559    fn break_statement(&mut self) -> Statement {
560        statement!(self, Statement::Break {
561            label: match self.tokenizer.peek() {
562                None                 => None,
563                Some(&Control(b';')) => None,
564                _                    => {
565                    if self.allow_asi {
566                        None
567                    } else {
568                        Some(self.tokenizer.expect_identifier())
569                    }
570                }
571            }
572        })
573    }
574
575    fn if_statement(&mut self) -> Statement {
576        let test = surround!(self, b'(', self.expression(0), b')');
577        let consequent = Box::new(self.block_or_statement());
578        let alternate = if allow!(self, Else) {
579            if allow!(self, If) {
580                Some(Box::new(self.if_statement()))
581            } else {
582                Some(Box::new(self.block_or_statement()))
583            }
584        } else {
585            None
586        };
587
588        Statement::If {
589            test: test,
590            consequent: consequent,
591            alternate: alternate,
592        }
593    }
594
595    #[inline]
596    fn while_statement(&mut self) -> Statement {
597        Statement::While {
598            test: surround!(self, b'(', self.expression(0), b')'),
599            body: Box::new(self.block_or_statement()),
600        }
601    }
602
603    #[inline]
604    fn for_statement(&mut self) -> Statement {
605        self.tokenizer.expect_control(b'(');
606
607        let init = match self.consume() {
608            Control(b';')     => None,
609
610            Declaration(kind) => Some(Box::new(self.variable_declaration(kind))),
611
612            token             => {
613                let expression = self.sequence_or_expression_from_token(token);
614
615                if let Expression::Binary {
616                    left,
617                    operator: In,
618                    right,
619                } = expression {
620                    return self.for_in_statement_from_expressions(*left, *right);
621                }
622
623                Some(Box::new(expression.into()))
624            },
625        };
626        if init.is_some() {
627            match self.consume() {
628                Operator(In)      => return self.for_in_statement(init),
629                Identifier(ident) => {
630                    let slice = ident.as_str();
631                    if slice != "of" {
632                        panic!("Unexpected identifier {}", slice);
633                    }
634                    return self.for_of_statement(init.unwrap());
635                },
636                Control(b';')     => {},
637                token             => unexpected_token!(self, token),
638            }
639        }
640
641        let test = match self.consume() {
642            Control(b';') => None,
643            token         => Some(self.sequence_or_expression_from_token(token)),
644        };
645        if !test.is_none() {
646            self.tokenizer.expect_control(b';')
647        }
648
649        let update = match self.consume() {
650            Control(b')') => None,
651            token         => Some(self.sequence_or_expression_from_token(token)),
652        };
653        if !update.is_none() {
654            self.tokenizer.expect_control(b')');
655        }
656
657        Statement::For {
658            init: init,
659            test: test,
660            update: update,
661            body: Box::new(self.block_or_statement()),
662        }
663    }
664
665    fn for_in_statement_from_expressions(
666        &mut self, left: Expression, right: Expression
667    ) -> Statement {
668        let left = Box::new(left.into());
669        self.tokenizer.expect_control(b')');
670
671        Statement::ForIn {
672            left: left,
673            right: right,
674            body: Box::new(self.block_or_statement()),
675        }
676    }
677
678    fn for_in_statement(&mut self, left: Option<Box<Statement>>) -> Statement {
679        let left = left.unwrap();
680        let right = self.sequence_or_expression();
681        self.tokenizer.expect_control(b')');
682
683        Statement::ForIn {
684            left: left,
685            right: right,
686            body: Box::new(self.block_or_statement()),
687        }
688    }
689
690    fn for_of_statement(&mut self, left: Box<Statement>) -> Statement {
691        let right = self.sequence_or_expression();
692        self.tokenizer.expect_control(b')');
693
694        Statement::ForOf {
695            left: left,
696            right: right,
697            body: Box::new(self.block_or_statement()),
698        }
699    }
700
701    fn parameter_list(&mut self) -> Vec<Parameter> {
702        let mut list = Vec::new();
703
704        loop {
705            if self.tokenizer.allow_control() == b')' {
706                self.tokenizer.next();
707                break;
708            }
709
710            list.push(self.parameter());
711
712            if self.tokenizer.allow_control() == b')' {
713                self.tokenizer.next();
714                break;
715            }
716
717            self.tokenizer.expect_control(b',');
718        }
719
720        list
721    }
722
723    #[inline]
724    fn parameter(&mut self) -> Parameter {
725        Parameter {
726            name: self.tokenizer.expect_identifier()
727        }
728    }
729
730    #[inline]
731    fn function_statement(&mut self) -> Statement {
732        let name = self.tokenizer.expect_identifier();
733
734        self.tokenizer.expect_control(b'(');
735
736        Statement::Function {
737            name: name,
738            params: self.parameter_list(),
739            body: self.block_body(),
740        }
741    }
742
743    fn class_member(&mut self, name: OwnedSlice, is_static: bool) -> ClassMember {
744        match self.tokenizer.peek() {
745            Some(&Control(b'(')) => {
746                self.tokenizer.next();
747
748                if !is_static && name.as_str() == "constructor" {
749                    ClassMember::Constructor {
750                        params: self.parameter_list(),
751                        body: self.block_body(),
752                    }
753                } else {
754                    ClassMember::Method {
755                        is_static: is_static,
756                        name: name,
757                        params: self.parameter_list(),
758                        body: self.block_body(),
759                    }
760                }
761            },
762            Some(&Operator(Assign)) => {
763                self.consume();
764                ClassMember::Property {
765                    is_static: is_static,
766                    name: name,
767                    value: self.expression(0),
768                }
769            },
770            _ => unexpected_token!(self),
771        }
772    }
773
774    #[inline]
775    fn class_statement(&mut self) -> Statement {
776        let name = self.tokenizer.expect_identifier();
777        let super_class = match self.consume() {
778            Extends => {
779                let name = self.tokenizer.expect_identifier();
780                self.tokenizer.expect_control(b'{');
781                Some(name)
782            },
783            Control(b'{') => None,
784            token         => unexpected_token!(self, token)
785        };
786
787        let mut members = Vec::new();
788
789        loop {
790            members.push(match self.consume() {
791                Identifier(name) => self.class_member(name, false),
792                Static           => {
793                    let name = self.tokenizer.expect_identifier();
794                    self.class_member(name, true)
795                },
796                Control(b';')    => continue,
797                Control(b'}')    => break,
798                token            => unexpected_token!(self, token)
799            });
800        }
801
802        Statement::Class {
803            name: name,
804            extends: super_class,
805            body: members,
806        }
807    }
808
809    fn statement(&mut self) -> Option<Statement> {
810        let token = match self.tokenizer.next() {
811            Some(token) => token,
812            _           => return None,
813        };
814
815        Some(match token {
816            Control(b';')     => return self.statement(),
817            Control(b'{')     => self.block_statement(),
818            Declaration(kind) => self.variable_declaration_statement(kind),
819            Return            => self.return_statement(),
820            Break             => self.break_statement(),
821            Function          => self.function_statement(),
822            Class             => self.class_statement(),
823            If                => self.if_statement(),
824            While             => self.while_statement(),
825            For               => self.for_statement(),
826            Identifier(label) => self.labeled_or_expression_statement(label),
827            Throw             => self.throw_statement(),
828            token             => self.expression_statement(token),
829        })
830    }
831}
832
833pub fn parse(source: String) -> Program {
834    let mut body = Vec::new();
835
836    {
837        let mut parser = Parser::new(&source);
838
839        while let Some(statement) = parser.statement() {
840            body.push(statement);
841        }
842    }
843
844    Program::new(source, body)
845}