rscel/compiler/
compiler.rs

1use std::collections::HashMap;
2
3mod pattern_utils;
4
5use pattern_utils::PrefixPattern;
6
7use super::{
8    ast_node::AstNode,
9    compiled_prog::{CompiledProg, NodeValue, PreResolvedCodePoint},
10    grammar::*,
11    source_range::SourceRange,
12    syntax_error::SyntaxError,
13    tokenizer::{TokenWithLoc, Tokenizer},
14    tokens::{AsToken, FStringSegment, IntoToken, Token},
15};
16use crate::{
17    interp::{Interpreter, JmpWhen},
18    BindContext, ByteCode, CelError, CelResult, CelValue, CelValueDyn, Program, StringTokenizer,
19};
20
21use crate::compile;
22
23pub struct CelCompiler<'l> {
24    tokenizer: &'l mut dyn Tokenizer,
25    bindings: BindContext<'l>,
26
27    next_label: u32,
28}
29
30impl<'l> CelCompiler<'l> {
31    pub fn with_tokenizer(tokenizer: &'l mut dyn Tokenizer) -> Self {
32        CelCompiler {
33            tokenizer,
34            bindings: BindContext::for_compile(),
35            next_label: 0,
36        }
37    }
38
39    pub fn compile(mut self) -> CelResult<Program> {
40        let (cprog, ast) = self.parse_expression()?;
41
42        if !self.tokenizer.peek()?.is_none() {
43            return Err(SyntaxError::from_location(self.tokenizer.location())
44                .with_message(format!("Unexpected token: {:?}", self.tokenizer.peek()?))
45                .into());
46        }
47
48        let mut prog = cprog.into_program(self.tokenizer.source().to_owned());
49        prog.details_mut().add_ast(ast);
50
51        Ok(prog)
52    }
53
54    fn new_label(&mut self) -> u32 {
55        let n = self.next_label;
56        self.next_label += 1;
57        n
58    }
59
60    fn parse_expression(&mut self) -> CelResult<(CompiledProg, AstNode<Expr>)> {
61        if let Some(Token::Match) = self.tokenizer.peek()?.as_token() {
62            self.tokenizer.next()?;
63            self.parse_match_expression()
64        } else {
65            let (lhs_node, lhs_ast) = self.parse_conditional_or()?;
66
67            match self.tokenizer.peek()?.as_token() {
68                Some(Token::Question) => {
69                    self.tokenizer.next()?;
70                    self.parse_turnary_expression(lhs_node, lhs_ast)
71                }
72                _ => {
73                    let range = lhs_ast.range();
74                    Ok((
75                        CompiledProg::from_node(lhs_node),
76                        AstNode::new(Expr::Unary(Box::new(lhs_ast)), range),
77                    ))
78                }
79            }
80        }
81    }
82
83    fn parse_turnary_expression(
84        &mut self,
85        or_prog: CompiledProg,
86        or_ast: AstNode<ConditionalOr>,
87    ) -> CelResult<(CompiledProg, AstNode<Expr>)> {
88        let (expr_node, mut details) = or_prog.into_parts();
89
90        let (true_clause_node, true_clause_ast) = self.parse_conditional_or()?;
91        let (true_clause_node, true_clause_details) = true_clause_node.into_parts();
92
93        let next = self.tokenizer.next()?;
94        if next.as_token() != Some(&Token::Colon) {
95            return Err(SyntaxError::from_location(self.tokenizer.location())
96                .with_message(format!("Unexpected token {:?}, expected COLON", next))
97                .into());
98        }
99
100        let (false_clause_node, false_clause_ast) = self.parse_expression()?;
101        let (false_clause_node, false_clause_details) = false_clause_node.into_parts();
102
103        let range = or_ast.range().surrounding(false_clause_ast.range());
104
105        details.union_from(true_clause_details);
106        details.union_from(false_clause_details);
107
108        let turnary_node = if let NodeValue::ConstExpr(i) = expr_node {
109            if i.is_err() {
110                CompiledProg {
111                    inner: NodeValue::ConstExpr(i),
112                    details,
113                }
114            } else {
115                if cfg!(feature = "type_prop") {
116                    if i.is_truthy() {
117                        CompiledProg {
118                            inner: true_clause_node,
119                            details,
120                        }
121                    } else {
122                        CompiledProg {
123                            inner: false_clause_node,
124                            details,
125                        }
126                    }
127                } else {
128                    if let CelValue::Bool(b) = i {
129                        if b {
130                            CompiledProg {
131                                inner: true_clause_node,
132                                details,
133                            }
134                        } else {
135                            CompiledProg {
136                                inner: false_clause_node,
137                                details,
138                            }
139                        }
140                    } else {
141                        CompiledProg {
142                            inner: NodeValue::ConstExpr(CelValue::from_err(CelError::Value(
143                                format!("{} cannot be converted to bool", i.as_type()),
144                            ))),
145                            details,
146                        }
147                    }
148                }
149            }
150        } else {
151            let true_clause_bytecode = true_clause_node.into_bytecode();
152            let false_clause_bytecode = false_clause_node.into_bytecode();
153
154            let after_true_clause = self.new_label();
155            let end_label = self.new_label();
156
157            CompiledProg {
158                inner: NodeValue::Bytecode(
159                    expr_node
160                        .into_bytecode()
161                        .into_iter()
162                        .chain(
163                            [PreResolvedCodePoint::JmpCond {
164                                when: JmpWhen::False,
165                                label: after_true_clause,
166                            }]
167                            .into_iter(),
168                        )
169                        .chain(true_clause_bytecode.into_iter())
170                        .chain(
171                            [
172                                PreResolvedCodePoint::Jmp { label: end_label },
173                                PreResolvedCodePoint::Label(after_true_clause),
174                            ]
175                            .into_iter(),
176                        )
177                        .chain(false_clause_bytecode.into_iter())
178                        .chain([PreResolvedCodePoint::Label(end_label)].into_iter())
179                        .collect(),
180                ),
181                details,
182            }
183        };
184
185        Ok((
186            turnary_node,
187            AstNode::new(
188                Expr::Ternary {
189                    condition: Box::new(or_ast),
190                    true_clause: Box::new(true_clause_ast),
191                    false_clause: Box::new(false_clause_ast),
192                },
193                range,
194            ),
195        ))
196    }
197
198    fn parse_match_expression(&mut self) -> CelResult<(CompiledProg, AstNode<Expr>)> {
199        let (condition_node, condition_ast) = self.parse_expression()?;
200
201        let mut range = condition_ast.range();
202
203        let (node_value, mut node_details) = condition_node.into_parts();
204        let mut node_bytecode = node_value.into_bytecode();
205
206        let next = self.tokenizer.next()?;
207        if next.as_token() != Some(&Token::LBrace) {
208            return Err(SyntaxError::from_location(self.tokenizer.location())
209                .with_message(format!("Unexpected token {:?}, expected LBRACE", next))
210                .into());
211        }
212
213        let mut expressions: Vec<AstNode<MatchCase>> = Vec::new();
214
215        let mut all_parts = Vec::new();
216
217        let mut comma_seen = true;
218
219        loop {
220            // the rbrace at the end of the match
221            let rbrace = self.tokenizer.peek()?;
222            if rbrace.as_token() == Some(&Token::RBrace) {
223                range = range.surrounding(rbrace.unwrap().loc);
224                break;
225            }
226
227            if !comma_seen {
228                return Err(SyntaxError::from_location(self.tokenizer.location())
229                    .with_message(format!("Expected COMMA"))
230                    .into());
231            }
232            comma_seen = false;
233
234            // case
235            let case_token = self.tokenizer.next()?;
236            if case_token.as_token() != Some(&Token::Case) {
237                return Err(SyntaxError::from_location(self.tokenizer.location())
238                    .with_message(format!("Unexpected token {:?}, expected CASE", next))
239                    .into());
240            }
241            //pattern
242            let (pattern_prog, pattern_ast) = self.parse_match_pattern()?;
243            let (pattern_bytecode, pattern_details) = pattern_prog.into_parts();
244            let pattern_bytecode = pattern_bytecode.into_bytecode();
245
246            node_details.union_from(pattern_details);
247
248            let pattern_range = pattern_ast.range();
249
250            // colon after pattern
251            let colon_token = self.tokenizer.next()?;
252            if colon_token.as_token() != Some(&Token::Colon) {
253                return Err(SyntaxError::from_location(self.tokenizer.location())
254                    .with_message(format!("Unexpected token {:?}, expected COLON", next))
255                    .into());
256            }
257
258            // eval expression
259            let (expr_prog, expr_ast) = self.parse_expression()?;
260            let (expr_bytecode, expr_details) = expr_prog.into_parts();
261            let expr_bytecode: Vec<_> = [ByteCode::Pop.into()]
262                .into_iter()
263                .chain(expr_bytecode.into_bytecode().into_iter())
264                .collect();
265
266            node_details.union_from(expr_details);
267
268            let case_range = pattern_range.surrounding(expr_ast.range());
269
270            all_parts.push((pattern_bytecode, expr_bytecode));
271            expressions.push(AstNode::new(
272                MatchCase {
273                    pattern: pattern_ast,
274                    expr: Box::new(expr_ast),
275                },
276                case_range,
277            ));
278            //
279            // comma after pattern
280            let comma_token = self.tokenizer.peek()?;
281            if comma_token.as_token() == Some(&Token::Comma) {
282                comma_seen = true;
283                self.tokenizer.next()?;
284            }
285        }
286
287        // consume the RBRACE
288        self.tokenizer.next()?;
289
290        // After match expression label
291        let after_match_s_l = self.new_label();
292
293        for (pattern_bytecode, expr_bytecode) in all_parts.into_iter() {
294            let after_case_l = self.new_label();
295
296            node_bytecode.push(ByteCode::Dup);
297            node_bytecode.extend(pattern_bytecode.into_iter());
298            node_bytecode.push(PreResolvedCodePoint::JmpCond {
299                when: JmpWhen::False,
300                label: after_case_l,
301            });
302
303            node_bytecode.extend(expr_bytecode);
304            node_bytecode.push(PreResolvedCodePoint::Jmp {
305                label: after_match_s_l,
306            });
307            node_bytecode.push(PreResolvedCodePoint::Label(after_case_l));
308        }
309
310        node_bytecode.extend([
311            ByteCode::Pop.into(),
312            ByteCode::Push(CelValue::from_null()).into(),
313            PreResolvedCodePoint::Label(after_match_s_l),
314        ]);
315
316        Ok((
317            CompiledProg::new(NodeValue::Bytecode(node_bytecode), node_details),
318            AstNode::new(
319                Expr::Match {
320                    condition: Box::new(condition_ast),
321                    cases: expressions,
322                },
323                range,
324            ),
325        ))
326    }
327
328    fn parse_match_pattern(&mut self) -> CelResult<(CompiledProg, AstNode<MatchPattern>)> {
329        let start = self.tokenizer.location();
330        let mut prefix_pattern = PrefixPattern::Eq;
331
332        if let Some(t) = self.tokenizer.peek()? {
333            if let Token::Ident(i) = t.token() {
334                let i = i.clone();
335                if i == "_" {
336                    self.tokenizer.next()?;
337                    let range = SourceRange::new(start, self.tokenizer.location());
338
339                    return Ok((
340                        CompiledProg::with_bytecode(
341                            [
342                                ByteCode::Pop,                     // pop off the pattern value
343                                ByteCode::Push(CelValue::true_()), // push true
344                            ]
345                            .into_iter()
346                            .collect(),
347                        ),
348                        AstNode::new(
349                            MatchPattern::Any(AstNode::new(MatchAnyPattern {}, range)),
350                            range,
351                        ),
352                    ));
353                } else if self.bindings.get_type(&i).is_some() {
354                    self.tokenizer.next()?;
355                    return Ok((
356                        CompiledProg::with_bytecode(
357                            [
358                                ByteCode::Push(CelValue::Ident("type".to_owned())),
359                                ByteCode::Call(1),
360                                ByteCode::Push(CelValue::Ident(i.clone())),
361                                ByteCode::Eq,
362                            ]
363                            .into_iter()
364                            .collect(),
365                        ),
366                        AstNode::new(
367                            MatchPattern::Type(AstNode::new(
368                                MatchTypePattern::from_type_str(&i),
369                                SourceRange::new(start, self.tokenizer.location()),
370                            )),
371                            SourceRange::new(start, self.tokenizer.location()),
372                        ),
373                    ));
374                }
375            }
376
377            if let Some(token_prefix_pattern) = PrefixPattern::from_token(t.token()) {
378                self.tokenizer.next()?;
379                prefix_pattern = token_prefix_pattern;
380            }
381        }
382
383        let op_range = SourceRange::new(start, self.tokenizer.location());
384
385        let (or_prod, or_ast) = self.parse_conditional_or()?;
386        let or_details = or_prod.details().clone();
387        let mut or_bc = or_prod.into_unresolved_bytecode();
388
389        or_bc.push(prefix_pattern.as_bytecode());
390
391        Ok((
392            CompiledProg::new(NodeValue::Bytecode(or_bc), or_details),
393            AstNode::new(
394                MatchPattern::Cmp {
395                    op: AstNode::new(prefix_pattern.as_ast(), op_range),
396                    or: or_ast,
397                },
398                SourceRange::new(start, self.tokenizer.location()),
399            ),
400        ))
401    }
402
403    fn parse_conditional_or(&mut self) -> CelResult<(CompiledProg, AstNode<ConditionalOr>)> {
404        let (mut current_node, mut current_ast) = into_unary(self.parse_conditional_and()?);
405
406        let label = self.new_label();
407
408        loop {
409            if let Some(Token::OrOr) = self.tokenizer.peek()?.as_token() {
410                self.tokenizer.next()?;
411                let (rhs_node, rhs_ast) = self.parse_conditional_and()?;
412
413                let jmp_node = CompiledProg::with_code_points(vec![
414                    PreResolvedCodePoint::Bytecode(ByteCode::Test),
415                    PreResolvedCodePoint::Bytecode(ByteCode::Dup),
416                    PreResolvedCodePoint::JmpCond {
417                        when: JmpWhen::True,
418                        label,
419                    },
420                ]);
421
422                let range = current_ast.range().surrounding(rhs_ast.range());
423
424                current_ast = AstNode::new(
425                    ConditionalOr::Binary {
426                        lhs: Box::new(current_ast),
427                        rhs: rhs_ast,
428                    },
429                    range,
430                );
431                current_node = compile!(
432                    [ByteCode::Or.into()],
433                    current_node.or(&rhs_node),
434                    current_node,
435                    jmp_node,
436                    rhs_node
437                );
438            } else {
439                break;
440            }
441        }
442
443        current_node.append_if_bytecode([PreResolvedCodePoint::Label(label)]);
444
445        Ok((current_node, current_ast))
446    }
447
448    fn parse_conditional_and(&mut self) -> CelResult<(CompiledProg, AstNode<ConditionalAnd>)> {
449        let (mut current_node, mut current_ast) = into_unary(self.parse_relation()?);
450
451        let label = self.new_label();
452
453        loop {
454            if let Some(Token::AndAnd) = self.tokenizer.peek()?.as_token() {
455                self.tokenizer.next()?;
456                let (rhs_node, rhs_ast) = self.parse_relation()?;
457
458                let jmp_node = CompiledProg::with_code_points(vec![
459                    PreResolvedCodePoint::Bytecode(ByteCode::Test),
460                    PreResolvedCodePoint::Bytecode(ByteCode::Dup),
461                    PreResolvedCodePoint::JmpCond {
462                        when: JmpWhen::False,
463                        label: label,
464                    },
465                ]);
466
467                let range = current_ast.range().surrounding(rhs_ast.range());
468
469                current_ast = AstNode::new(
470                    ConditionalAnd::Binary {
471                        lhs: Box::new(current_ast),
472                        rhs: rhs_ast,
473                    },
474                    range,
475                );
476                current_node = compile!(
477                    [ByteCode::And.into()],
478                    current_node.and(rhs_node),
479                    current_node,
480                    jmp_node,
481                    rhs_node
482                );
483            } else {
484                break;
485            }
486        }
487        current_node.append_if_bytecode([PreResolvedCodePoint::Label(label)]);
488
489        Ok((current_node, current_ast))
490    }
491
492    fn parse_relation(&mut self) -> CelResult<(CompiledProg, AstNode<Relation>)> {
493        let (mut current_node, mut current_ast) = into_unary(self.parse_addition()?);
494
495        loop {
496            match self.tokenizer.peek()?.as_token() {
497                Some(Token::LessThan) => {
498                    self.tokenizer.next()?;
499
500                    let (rhs_node, rhs_ast) = self.parse_addition()?;
501                    let range = current_ast.range().surrounding(rhs_ast.range());
502
503                    current_ast = AstNode::new(
504                        Relation::Binary {
505                            lhs: Box::new(current_ast),
506                            op: Relop::Lt,
507                            rhs: rhs_ast,
508                        },
509                        range,
510                    );
511
512                    current_node = compile!(
513                        [ByteCode::Lt.into()],
514                        current_node.lt(rhs_node),
515                        current_node,
516                        rhs_node
517                    );
518                }
519                Some(Token::LessEqual) => {
520                    self.tokenizer.next()?;
521                    let (rhs_node, rhs_ast) = self.parse_addition()?;
522                    let range = current_ast.range().surrounding(rhs_ast.range());
523
524                    current_ast = AstNode::new(
525                        Relation::Binary {
526                            lhs: Box::new(current_ast),
527                            op: Relop::Le,
528                            rhs: rhs_ast,
529                        },
530                        range,
531                    );
532
533                    current_node = compile!(
534                        [ByteCode::Le.into()],
535                        current_node.le(rhs_node),
536                        current_node,
537                        rhs_node
538                    );
539                }
540                Some(Token::EqualEqual) => {
541                    self.tokenizer.next()?;
542                    let (rhs_node, rhs_ast) = self.parse_addition()?;
543                    let range = current_ast.range().surrounding(rhs_ast.range());
544
545                    current_ast = AstNode::new(
546                        Relation::Binary {
547                            lhs: Box::new(current_ast),
548                            op: Relop::Eq,
549                            rhs: rhs_ast,
550                        },
551                        range,
552                    );
553
554                    current_node = compile!(
555                        [ByteCode::Eq.into()],
556                        CelValueDyn::eq(&current_node, &rhs_node),
557                        current_node,
558                        rhs_node
559                    );
560                }
561                Some(Token::NotEqual) => {
562                    self.tokenizer.next()?;
563                    let (rhs_node, rhs_ast) = self.parse_addition()?;
564                    let range = current_ast.range().surrounding(rhs_ast.range());
565
566                    current_ast = AstNode::new(
567                        Relation::Binary {
568                            lhs: Box::new(current_ast),
569                            op: Relop::Ne,
570                            rhs: rhs_ast,
571                        },
572                        range,
573                    );
574
575                    current_node = compile!(
576                        [ByteCode::Ne.into()],
577                        current_node.neq(rhs_node),
578                        current_node,
579                        rhs_node
580                    );
581                }
582                Some(Token::GreaterEqual) => {
583                    self.tokenizer.next()?;
584                    let (rhs_node, rhs_ast) = self.parse_addition()?;
585                    let range = current_ast.range().surrounding(rhs_ast.range());
586
587                    current_ast = AstNode::new(
588                        Relation::Binary {
589                            lhs: Box::new(current_ast),
590                            op: Relop::Ge,
591                            rhs: rhs_ast,
592                        },
593                        range,
594                    );
595
596                    current_node = compile!(
597                        [ByteCode::Ge.into()],
598                        current_node.ge(rhs_node),
599                        current_node,
600                        rhs_node
601                    );
602                }
603                Some(Token::GreaterThan) => {
604                    self.tokenizer.next()?;
605                    let (rhs_node, rhs_ast) = self.parse_addition()?;
606                    let range = current_ast.range().surrounding(rhs_ast.range());
607
608                    current_ast = AstNode::new(
609                        Relation::Binary {
610                            lhs: Box::new(current_ast),
611                            op: Relop::Gt,
612                            rhs: rhs_ast,
613                        },
614                        range,
615                    );
616
617                    current_node = compile!(
618                        [ByteCode::Gt.into()],
619                        current_node.gt(rhs_node),
620                        current_node,
621                        rhs_node
622                    );
623                }
624                Some(Token::In) => {
625                    self.tokenizer.next()?;
626                    let (rhs_node, rhs_ast) = self.parse_addition()?;
627                    let range = current_ast.range().surrounding(rhs_ast.range());
628
629                    current_ast = AstNode::new(
630                        Relation::Binary {
631                            lhs: Box::new(current_ast),
632                            op: Relop::In,
633                            rhs: rhs_ast,
634                        },
635                        range,
636                    );
637                    current_node = compile!(
638                        [ByteCode::In.into()],
639                        current_node.in_(rhs_node),
640                        current_node,
641                        rhs_node
642                    )
643                }
644                _ => break,
645            }
646        }
647
648        Ok((current_node, current_ast))
649    }
650
651    fn parse_addition(&mut self) -> CelResult<(CompiledProg, AstNode<Addition>)> {
652        let (mut current_node, mut current_ast) = into_unary(self.parse_multiplication()?);
653
654        loop {
655            match self.tokenizer.peek()?.as_token() {
656                Some(Token::Add) => {
657                    self.tokenizer.next()?;
658
659                    let (rhs_node, rhs_ast) = self.parse_multiplication()?;
660                    let range = current_ast.range().surrounding(rhs_ast.range());
661
662                    current_ast = AstNode::new(
663                        Addition::Binary {
664                            lhs: Box::new(current_ast),
665                            op: AddOp::Add,
666                            rhs: rhs_ast,
667                        },
668                        range,
669                    );
670
671                    current_node = compile!(
672                        [ByteCode::Add.into()],
673                        current_node + rhs_node,
674                        current_node,
675                        rhs_node
676                    );
677                }
678                Some(Token::Minus) => {
679                    self.tokenizer.next()?;
680
681                    let (rhs_node, rhs_ast) = self.parse_multiplication()?;
682                    let range = current_ast.range().surrounding(rhs_ast.range());
683
684                    current_ast = AstNode::new(
685                        Addition::Binary {
686                            lhs: Box::new(current_ast),
687                            op: AddOp::Sub,
688                            rhs: rhs_ast,
689                        },
690                        range,
691                    );
692
693                    current_node = compile!(
694                        [ByteCode::Sub.into()],
695                        current_node - rhs_node,
696                        current_node,
697                        rhs_node
698                    );
699                }
700                _ => break,
701            }
702        }
703
704        Ok((current_node, current_ast))
705    }
706
707    fn parse_multiplication(&mut self) -> CelResult<(CompiledProg, AstNode<Multiplication>)> {
708        let (mut current_node, mut current_ast) = into_unary(self.parse_unary()?);
709
710        loop {
711            match self.tokenizer.peek()?.as_token() {
712                Some(Token::Multiply) => {
713                    self.tokenizer.next()?;
714
715                    let (rhs_node, rhs_ast) = self.parse_unary()?;
716                    let range = current_ast.range().surrounding(rhs_ast.range());
717
718                    current_ast = AstNode::new(
719                        Multiplication::Binary {
720                            lhs: Box::new(current_ast),
721                            op: MultOp::Mult,
722                            rhs: rhs_ast,
723                        },
724                        range,
725                    );
726                    current_node = compile!(
727                        [ByteCode::Mul.into()],
728                        current_node * rhs_node,
729                        current_node,
730                        rhs_node
731                    );
732                }
733                Some(Token::Divide) => {
734                    self.tokenizer.next()?;
735
736                    let (rhs_node, rhs_ast) = self.parse_unary()?;
737                    let range = current_ast.range().surrounding(rhs_ast.range());
738
739                    current_ast = AstNode::new(
740                        Multiplication::Binary {
741                            lhs: Box::new(current_ast),
742                            op: MultOp::Div,
743                            rhs: rhs_ast,
744                        },
745                        range,
746                    );
747
748                    current_node = compile!(
749                        [ByteCode::Div.into()],
750                        current_node / rhs_node,
751                        current_node,
752                        rhs_node
753                    );
754                }
755                Some(Token::Mod) => {
756                    self.tokenizer.next()?;
757
758                    let (rhs_node, rhs_ast) = self.parse_unary()?;
759                    let range = current_ast.range().surrounding(rhs_ast.range());
760
761                    current_ast = AstNode::new(
762                        Multiplication::Binary {
763                            lhs: Box::new(current_ast),
764                            op: MultOp::Mod,
765                            rhs: rhs_ast,
766                        },
767                        range,
768                    );
769
770                    current_node = compile!(
771                        [ByteCode::Mod.into()],
772                        current_node % rhs_node,
773                        current_node,
774                        rhs_node
775                    );
776                }
777                _ => break,
778            }
779        }
780
781        Ok((current_node, current_ast))
782    }
783
784    fn parse_unary(&mut self) -> CelResult<(CompiledProg, AstNode<Unary>)> {
785        match self.tokenizer.peek()?.as_token() {
786            Some(Token::Not) => {
787                let (not, not_ast) = self.parse_not_list()?;
788                let (member, member_ast) = self.parse_member()?;
789
790                let range = not_ast.range().surrounding(member_ast.range());
791
792                Ok((
793                    member.append_result(not),
794                    AstNode::new(
795                        Unary::NotMember {
796                            nots: not_ast,
797                            member: member_ast,
798                        },
799                        range,
800                    ),
801                ))
802            }
803            Some(Token::Minus) => {
804                let (neg, neg_ast) = self.parse_neg_list()?;
805                let (member, member_ast) = self.parse_member()?;
806
807                let range = member_ast.range().surrounding(neg_ast.range());
808
809                Ok((
810                    member.append_result(neg),
811                    AstNode::new(
812                        Unary::NegMember {
813                            negs: neg_ast,
814                            member: member_ast,
815                        },
816                        range,
817                    ),
818                ))
819            }
820            _ => Ok(into_unary(self.parse_member()?)),
821        }
822    }
823
824    fn parse_not_list(&mut self) -> CelResult<(CompiledProg, AstNode<NotList>)> {
825        match self.tokenizer.peek()? {
826            Some(&TokenWithLoc {
827                token: Token::Not,
828                loc,
829            }) => {
830                self.tokenizer.next()?;
831
832                let (not_list, ast) = self.parse_not_list()?;
833                let node = compile!([ByteCode::Not.into()], not_list, not_list);
834
835                let range = ast.range().surrounding(loc);
836
837                Ok((
838                    node,
839                    AstNode::new(
840                        NotList::List {
841                            tail: Box::new(ast),
842                        },
843                        range,
844                    ),
845                ))
846            }
847            _ => {
848                let start_loc = self.tokenizer.location();
849                Ok((
850                    CompiledProg::empty(),
851                    AstNode::new(NotList::EmptyList, SourceRange::new(start_loc, start_loc)),
852                ))
853            }
854        }
855    }
856
857    fn parse_neg_list(&mut self) -> CelResult<(CompiledProg, AstNode<NegList>)> {
858        match self.tokenizer.peek()? {
859            Some(&TokenWithLoc {
860                token: Token::Minus,
861                loc,
862            }) => {
863                self.tokenizer.next()?;
864
865                let (neg_list, ast) = self.parse_neg_list()?;
866                let node = compile!([ByteCode::Neg.into()], neg_list, neg_list);
867
868                let range = ast.range().surrounding(loc);
869
870                Ok((
871                    node,
872                    AstNode::new(
873                        NegList::List {
874                            tail: Box::new(ast),
875                        },
876                        range,
877                    ),
878                ))
879            }
880            _ => {
881                let start_loc = self.tokenizer.location();
882                Ok((
883                    CompiledProg::empty(),
884                    AstNode::new(NegList::EmptyList, SourceRange::new(start_loc, start_loc)),
885                ))
886            }
887        }
888    }
889
890    fn parse_member(&mut self) -> CelResult<(CompiledProg, AstNode<Member>)> {
891        let (primary_node, primary_ast) = self.parse_primary()?;
892
893        let mut member_prime_node = CompiledProg::from_node(primary_node);
894        let mut member_prime_ast: Vec<AstNode<MemberPrime>> = Vec::new();
895
896        loop {
897            match self.tokenizer.peek()? {
898                Some(&TokenWithLoc {
899                    token: Token::Dot,
900                    loc: dot_loc,
901                }) => {
902                    self.tokenizer.next()?;
903                    match self.tokenizer.next()? {
904                        Some(TokenWithLoc {
905                            token: Token::Ident(ident),
906                            loc,
907                        }) => {
908                            let res = CompiledProg::with_const(CelValue::from_ident(&ident));
909
910                            member_prime_node = CompiledProg::from_children2_w_bytecode_cannone(
911                                member_prime_node,
912                                res,
913                                vec![ByteCode::Access],
914                                |o, c| {
915                                    if let CelValue::Ident(s) = c {
916                                        // Allow for const eval for obj members in the
917                                        // off chance a user does somthing like this
918                                        // `{'a': 3}.a`. Its const value will be 3.
919                                        if o.is_obj() {
920                                            // So if this fails we should break the const
921                                            // status and let the compiler generate some
922                                            // bytecode for function discovery and such.
923                                            match o.access(&s) {
924                                                CelValue::Err(_) => None,
925                                                o => Some(o),
926                                            }
927                                        } else {
928                                            None
929                                        }
930                                    } else {
931                                        Some(CelValue::from_err(CelError::value(
932                                            "Accessor must be ident",
933                                        )))
934                                    }
935                                },
936                            );
937
938                            member_prime_ast.push(AstNode::new(
939                                MemberPrime::MemberAccess {
940                                    ident: AstNode::new(Ident(ident.clone()), loc),
941                                },
942                                dot_loc.surrounding(loc),
943                            ));
944                        }
945                        Some(other) => {
946                            return Err(SyntaxError::from_location(self.tokenizer.location())
947                                .with_message(format!("Expected IDENT got {:?}", other))
948                                .into());
949                        }
950                        None => {
951                            return Err(SyntaxError::from_location(self.tokenizer.location())
952                                .with_message("Expected IDENT got NOTHING".to_string())
953                                .into());
954                        }
955                    }
956                }
957                Some(&TokenWithLoc {
958                    token: Token::LParen,
959                    loc,
960                }) => {
961                    self.tokenizer.next()?;
962
963                    let args = self.parse_expression_list(Token::RParen)?;
964
965                    let token = self.tokenizer.next()?;
966                    if let Some(TokenWithLoc {
967                        token: Token::RParen,
968                        loc: rparen_loc,
969                    }) = token
970                    {
971                        let args_len = args.len();
972
973                        let mut args_ast = Vec::new();
974                        let mut args_node = CompiledProg::empty();
975                        // Arguments are evaluated backwards so they get popped off the stack in order
976                        for (a, ast) in args.into_iter().rev() {
977                            args_ast.push(ast);
978                            args_node =
979                                args_node.append_result(CompiledProg::with_code_points(vec![
980                                    ByteCode::Push(a.into_unresolved_bytecode().resolve().into())
981                                        .into(),
982                                ]))
983                        }
984
985                        member_prime_node = args_node
986                            .consume_child(member_prime_node)
987                            .consume_child(CompiledProg::with_code_points(vec![ByteCode::Call(
988                                args_len as u32,
989                            )
990                            .into()]));
991
992                        member_prime_node = self.check_for_const(member_prime_node);
993
994                        member_prime_ast.push(AstNode::new(
995                            MemberPrime::Call {
996                                call: AstNode::new(
997                                    ExprList { exprs: args_ast },
998                                    loc.surrounding(rparen_loc),
999                                ),
1000                            },
1001                            loc.surrounding(rparen_loc),
1002                        ));
1003                    } else {
1004                        return Err(SyntaxError::from_location(self.tokenizer.location())
1005                            .with_message(format!(
1006                                "Unexpected token {}, expected RPARAN",
1007                                &token.map_or("NOTHING".to_string(), |x| format!("{:?}", x))
1008                            ))
1009                            .into());
1010                    }
1011                }
1012                Some(&TokenWithLoc {
1013                    token: Token::LBracket,
1014                    loc,
1015                }) => {
1016                    self.tokenizer.next()?;
1017
1018                    let (index_node, index_ast) = self.parse_expression()?;
1019
1020                    match self.tokenizer.next()? {
1021                        Some(TokenWithLoc {
1022                            token: Token::RBracket,
1023                            loc: rbracket_loc,
1024                        }) => {
1025                            member_prime_node = compile!(
1026                                [ByteCode::Index.into()],
1027                                member_prime_node.index(index_node),
1028                                member_prime_node,
1029                                index_node
1030                            );
1031
1032                            member_prime_ast.push(AstNode::new(
1033                                MemberPrime::ArrayAccess { access: index_ast },
1034                                loc.surrounding(rbracket_loc),
1035                            ));
1036                        }
1037                        next_token => {
1038                            return Err(SyntaxError::from_location(self.tokenizer.location())
1039                                .with_message(format!(
1040                                    "Unexpected token {}, expected RBRACKET",
1041                                    &next_token
1042                                        .map_or("NOTHING".to_string(), |x| format!("{:?}", x))
1043                                ))
1044                                .into());
1045                        }
1046                    }
1047                }
1048                _ => break,
1049            }
1050        }
1051
1052        let mut range = primary_ast.range();
1053        for m in member_prime_ast.iter() {
1054            range = range.surrounding(m.range());
1055        }
1056
1057        Ok((
1058            member_prime_node,
1059            AstNode::new(
1060                Member {
1061                    primary: primary_ast,
1062                    member: member_prime_ast,
1063                },
1064                range,
1065            ),
1066        ))
1067    }
1068
1069    fn parse_primary(&mut self) -> CelResult<(CompiledProg, AstNode<Primary>)> {
1070        match self.tokenizer.next()? {
1071            Some(TokenWithLoc {
1072                token: Token::Ident(val),
1073                loc,
1074            }) => Ok((
1075                CompiledProg::with_code_points(vec![
1076                    ByteCode::Push(CelValue::from_ident(&val)).into()
1077                ])
1078                .add_ident(&val),
1079                AstNode::new(Primary::Ident(Ident(val.clone())), loc),
1080            )),
1081            Some(TokenWithLoc {
1082                token: Token::LParen,
1083                loc,
1084            }) => {
1085                let (expr, expr_ast) = self.parse_expression()?;
1086
1087                let next_token = self.tokenizer.next();
1088                let rparen_loc = match next_token? {
1089                    Some(TokenWithLoc {
1090                        token: Token::RParen,
1091                        loc,
1092                    }) => loc,
1093                    Some(TokenWithLoc { token, loc }) => {
1094                        return Err(CelError::syntax(
1095                            SyntaxError::from_location(loc.start())
1096                                .with_message(format!("Expected RPAREN got {:?}", token)),
1097                        ))
1098                    }
1099                    None => {
1100                        return Err(CelError::syntax(
1101                            SyntaxError::from_location(loc.start())
1102                                .with_message("Open paren!".to_owned()),
1103                        ))
1104                    }
1105                };
1106
1107                Ok((
1108                    CompiledProg::from_node(expr),
1109                    AstNode::new(Primary::Parens(expr_ast), loc.surrounding(rparen_loc)),
1110                ))
1111            }
1112            Some(TokenWithLoc {
1113                token: Token::LBracket,
1114                loc,
1115            }) => {
1116                // list construction
1117                let expr_node_list = self.parse_expression_list(Token::RBracket)?;
1118                let expr_list_len = expr_node_list.len();
1119                let (expr_list, expr_list_ast): (Vec<_>, Vec<_>) =
1120                    expr_node_list.into_iter().unzip();
1121
1122                let range = if let Some(TokenWithLoc {
1123                    token: Token::RBracket,
1124                    loc: rbracket_loc,
1125                }) = self.tokenizer.peek()?
1126                {
1127                    loc.surrounding(*rbracket_loc)
1128                } else {
1129                    return Err(SyntaxError::from_location(self.tokenizer.location())
1130                        .with_message(format!("Unexpected token, expected RBRACKET",))
1131                        .into());
1132                };
1133
1134                self.tokenizer.next()?;
1135
1136                Ok((
1137                    CompiledProg::from_children_w_bytecode(
1138                        expr_list,
1139                        vec![ByteCode::MkList(expr_list_len as u32)],
1140                        |c| c.into(),
1141                    ),
1142                    AstNode::new(
1143                        Primary::ListConstruction(AstNode::new(
1144                            ExprList {
1145                                exprs: expr_list_ast,
1146                            },
1147                            range,
1148                        )),
1149                        range,
1150                    ),
1151                ))
1152            }
1153            Some(TokenWithLoc {
1154                token: Token::LBrace,
1155                loc,
1156            }) => {
1157                // Dictionary construction
1158                let obj_init = self.parse_obj_inits()?;
1159
1160                let range = if let Some(&TokenWithLoc {
1161                    token: Token::RBrace,
1162                    loc: rbrace_loc,
1163                }) = self.tokenizer.peek()?
1164                {
1165                    self.tokenizer.next()?;
1166
1167                    loc.surrounding(rbrace_loc)
1168                } else {
1169                    return Err(SyntaxError::from_location(self.tokenizer.location())
1170                        .with_message(format!("Unexpected token, expected RBRACE",))
1171                        .into());
1172                };
1173
1174                let obj_init_len = obj_init.len();
1175                debug_assert!(obj_init_len % 2 == 0);
1176
1177                let mut init_asts = Vec::new();
1178
1179                let (compiled_children, children_ast): (Vec<_>, Vec<_>) =
1180                    obj_init.into_iter().unzip();
1181
1182                let mut children_ast_iter = children_ast.into_iter();
1183                // init is created as value then key for mkdict stack
1184                while let Some(val_ast) = children_ast_iter.next() {
1185                    let key_ast = children_ast_iter.next().unwrap();
1186
1187                    let range = key_ast.range().surrounding(val_ast.range());
1188
1189                    init_asts.push(AstNode::new(
1190                        ObjInit {
1191                            key: key_ast,
1192                            value: val_ast,
1193                        },
1194                        range,
1195                    ));
1196                }
1197
1198                let new_ast = AstNode::new(
1199                    Primary::ObjectInit(AstNode::new(ObjInits { inits: init_asts }, range)),
1200                    range,
1201                );
1202
1203                Ok((
1204                    CompiledProg::from_children_w_bytecode(
1205                        compiled_children,
1206                        vec![ByteCode::MkDict(obj_init_len as u32 / 2)],
1207                        |vals| {
1208                            let mut obj_map = HashMap::new();
1209                            for i in (0..vals.len()).step_by(2) {
1210                                let key = if let CelValue::String(ref k) = vals[i + 1] {
1211                                    k
1212                                } else {
1213                                    return CelValue::from_err(CelError::value(
1214                                        "Only strings can be object keys",
1215                                    ));
1216                                };
1217
1218                                obj_map.insert(key.clone(), vals[i].clone());
1219                            }
1220
1221                            obj_map.into()
1222                        },
1223                    ),
1224                    new_ast,
1225                ))
1226            }
1227            Some(TokenWithLoc {
1228                token: Token::UIntLit(val),
1229                loc,
1230            }) => Ok((
1231                CompiledProg::with_const(val.into()),
1232                AstNode::new(Primary::Literal(LiteralsAndKeywords::UnsignedLit(val)), loc),
1233            )),
1234            Some(TokenWithLoc {
1235                token: Token::IntLit(val),
1236                loc,
1237            }) => Ok((
1238                CompiledProg::with_const((val as i64).into()),
1239                AstNode::new(
1240                    Primary::Literal(LiteralsAndKeywords::IntegerLit(val as i64)),
1241                    loc,
1242                ),
1243            )),
1244            Some(TokenWithLoc {
1245                token: Token::FloatLit(val),
1246                loc,
1247            }) => Ok((
1248                CompiledProg::with_const((val).into()),
1249                AstNode::new(Primary::Literal(LiteralsAndKeywords::FloatingLit(val)), loc),
1250            )),
1251            Some(TokenWithLoc {
1252                token: Token::StringLit(val),
1253                loc,
1254            }) => Ok((
1255                CompiledProg::with_const(val.clone().into()),
1256                AstNode::new(
1257                    Primary::Literal(LiteralsAndKeywords::StringLit(val.clone())),
1258                    loc,
1259                ),
1260            )),
1261            Some(TokenWithLoc {
1262                token: Token::ByteStringLit(val),
1263                loc,
1264            }) => Ok((
1265                CompiledProg::with_const(val.clone().into()),
1266                AstNode::new(
1267                    Primary::Literal(LiteralsAndKeywords::ByteStringLit(val.into())),
1268                    loc,
1269                ),
1270            )),
1271            Some(TokenWithLoc {
1272                token: Token::FStringLit(segments),
1273                loc,
1274            }) => {
1275                let mut bytecode = Vec::<PreResolvedCodePoint>::new();
1276
1277                for segment in segments.iter() {
1278                    match segment {
1279                        FStringSegment::Lit(c) => {
1280                            bytecode.push(ByteCode::Push(CelValue::String(c.clone())).into())
1281                        }
1282                        FStringSegment::Expr(e) => {
1283                            let mut tok = StringTokenizer::with_input(&e);
1284                            let mut comp = CelCompiler::with_tokenizer(&mut tok);
1285
1286                            let (e, _) = comp.parse_expression()?;
1287
1288                            bytecode.push(
1289                                ByteCode::Push(CelValue::ByteCode(
1290                                    e.into_unresolved_bytecode().resolve(),
1291                                ))
1292                                .into(),
1293                            );
1294                        }
1295                    }
1296                    bytecode.push(ByteCode::Push(CelValue::Ident("string".to_string())).into());
1297                    bytecode.push(ByteCode::Call(1).into());
1298                }
1299
1300                // Reverse it so its evaluated in order on the stack
1301                bytecode.push(ByteCode::FmtString(segments.len() as u32).into());
1302
1303                Ok((
1304                    CompiledProg::with_code_points(bytecode),
1305                    AstNode::new(
1306                        Primary::Literal(LiteralsAndKeywords::FStringList(segments.clone())),
1307                        loc,
1308                    ),
1309                ))
1310            }
1311            Some(TokenWithLoc {
1312                token: Token::BoolLit(val),
1313                loc,
1314            }) => Ok((
1315                CompiledProg::with_const(val.into()),
1316                AstNode::new(Primary::Literal(LiteralsAndKeywords::BooleanLit(val)), loc),
1317            )),
1318            Some(TokenWithLoc {
1319                token: Token::Null,
1320                loc,
1321            }) => Ok((
1322                CompiledProg::with_const(CelValue::from_null()),
1323                AstNode::new(Primary::Literal(LiteralsAndKeywords::NullLit), loc),
1324            )),
1325            _ => Err(SyntaxError::from_location(self.tokenizer.location())
1326                .with_message(format!(
1327                    "unexpected {:?}! expecting PRIMARY",
1328                    self.tokenizer.peek()
1329                ))
1330                .into()),
1331        }
1332    }
1333
1334    fn parse_expression_list(
1335        &mut self,
1336        ending: Token,
1337    ) -> CelResult<Vec<(CompiledProg, AstNode<Expr>)>> {
1338        let mut exprs = Vec::new();
1339
1340        'outer: loop {
1341            match self.tokenizer.peek()?.as_token() {
1342                Some(val) => {
1343                    if *val == ending {
1344                        break 'outer;
1345                    }
1346                }
1347                None => {}
1348            }
1349
1350            let compiled = self.parse_expression()?;
1351            exprs.push(compiled);
1352
1353            match self.tokenizer.peek()?.as_token() {
1354                Some(Token::Comma) => {
1355                    self.tokenizer.next()?;
1356                    continue;
1357                }
1358                _ => break 'outer,
1359            }
1360        }
1361
1362        Ok(exprs)
1363    }
1364
1365    fn parse_obj_inits(&mut self) -> CelResult<Vec<(CompiledProg, AstNode<Expr>)>> {
1366        let mut inits = Vec::new();
1367
1368        'outer: loop {
1369            if self.tokenizer.peek()?.as_token() == Some(&Token::RBrace) {
1370                break 'outer;
1371            }
1372
1373            let compiled_key = self.parse_expression()?;
1374
1375            let next_token = self.tokenizer.next()?.into_token();
1376            if next_token != Some(Token::Colon) {
1377                return Err(SyntaxError::from_location(self.tokenizer.location())
1378                    .with_message(format!("Invalid token: expected ':' got {:?}", next_token))
1379                    .into());
1380            }
1381            // MkDict expects value then key
1382            let compiled_value = self.parse_expression()?;
1383
1384            inits.push(compiled_value);
1385            inits.push(compiled_key);
1386
1387            match self.tokenizer.peek()?.as_token() {
1388                Some(Token::Comma) => {
1389                    self.tokenizer.next()?;
1390                    continue;
1391                }
1392                _ => break 'outer,
1393            }
1394        }
1395
1396        Ok(inits)
1397    }
1398
1399    #[inline]
1400    fn check_for_const(&self, member_prime_node: CompiledProg) -> CompiledProg {
1401        let mut i = Interpreter::empty();
1402        i.add_bindings(&self.bindings);
1403        let bc = member_prime_node.into_unresolved_bytecode().resolve();
1404        let r = i.run_raw(&bc, true);
1405
1406        match r {
1407            Ok(v) => CompiledProg::with_const(v),
1408            Err(_) => CompiledProg::with_bytecode(bc),
1409        }
1410    }
1411}
1412
1413#[cfg(test)]
1414mod test {
1415    use test_case::test_case;
1416
1417    use crate::compiler::string_tokenizer::StringTokenizer;
1418
1419    use super::CelCompiler;
1420
1421    #[test_case("3+1"; "addition")]
1422    #[test_case("(1+foo) / 23"; "with literal")]
1423    #[test_case("(true || false) + 23"; "with boolean")]
1424    #[test_case("foo.bar"; "member access")]
1425    #[test_case("foo[3]"; "list access")]
1426    #[test_case("foo.bar()"; "member call")]
1427    #[test_case("foo()"; "empty function call")]
1428    #[test_case("foo(3)"; "function call")]
1429    #[test_case("1"; "just 1")]
1430    #[test_case("foo"; "an ident")]
1431    #[test_case("foo.bar.baz"; "deep member access")]
1432    #[test_case("--foo"; "double neg")]
1433    #[test_case("foo || true"; "or")]
1434    #[test_case("int(foo.bar && foo.baz) + 4 - (8 * 7)"; "complex")]
1435    #[test_case("true ? 3 : 1"; "ternary")]
1436    #[test_case("[1, 2, 3 + 3, 4 * 2, \"fish\"]"; "list construction")]
1437    fn test_parser(input: &str) {
1438        let mut tokenizer = StringTokenizer::with_input(input);
1439        CelCompiler::with_tokenizer(&mut tokenizer)
1440            .compile()
1441            .unwrap();
1442    }
1443
1444    #[test]
1445    fn syntax_error() {
1446        let mut tokenizer = StringTokenizer::with_input("3 + 4 ) - 3");
1447
1448        let e = CelCompiler::with_tokenizer(&mut tokenizer).compile();
1449
1450        assert!(e.is_err());
1451        let _ = format!("{}", e.unwrap_err());
1452    }
1453}