1use std::collections::HashMap;
2
3mod pattern_utils;
4
5use pattern_utils::PrefixPattern;
6
7use super::{
8 ast_node::AstNode,
9 compiled_prog::{CompiledProg, NodeValue, PreResolvedCodePoint},
10 grammar::*,
11 source_range::SourceRange,
12 syntax_error::SyntaxError,
13 tokenizer::{TokenWithLoc, Tokenizer},
14 tokens::{AsToken, FStringSegment, IntoToken, Token},
15};
16use crate::{
17 interp::{Interpreter, JmpWhen},
18 BindContext, ByteCode, CelError, CelResult, CelValue, CelValueDyn, Program, StringTokenizer,
19};
20
21use crate::compile;
22
23pub struct CelCompiler<'l> {
24 tokenizer: &'l mut dyn Tokenizer,
25 bindings: BindContext<'l>,
26
27 next_label: u32,
28}
29
30impl<'l> CelCompiler<'l> {
31 pub fn with_tokenizer(tokenizer: &'l mut dyn Tokenizer) -> Self {
32 CelCompiler {
33 tokenizer,
34 bindings: BindContext::for_compile(),
35 next_label: 0,
36 }
37 }
38
39 pub fn compile(mut self) -> CelResult<Program> {
40 let (cprog, ast) = self.parse_expression()?;
41
42 if !self.tokenizer.peek()?.is_none() {
43 return Err(SyntaxError::from_location(self.tokenizer.location())
44 .with_message(format!("Unexpected token: {:?}", self.tokenizer.peek()?))
45 .into());
46 }
47
48 let mut prog = cprog.into_program(self.tokenizer.source().to_owned());
49 prog.details_mut().add_ast(ast);
50
51 Ok(prog)
52 }
53
54 fn new_label(&mut self) -> u32 {
55 let n = self.next_label;
56 self.next_label += 1;
57 n
58 }
59
60 fn parse_expression(&mut self) -> CelResult<(CompiledProg, AstNode<Expr>)> {
61 if let Some(Token::Match) = self.tokenizer.peek()?.as_token() {
62 self.tokenizer.next()?;
63 self.parse_match_expression()
64 } else {
65 let (lhs_node, lhs_ast) = self.parse_conditional_or()?;
66
67 match self.tokenizer.peek()?.as_token() {
68 Some(Token::Question) => {
69 self.tokenizer.next()?;
70 self.parse_turnary_expression(lhs_node, lhs_ast)
71 }
72 _ => {
73 let range = lhs_ast.range();
74 Ok((
75 CompiledProg::from_node(lhs_node),
76 AstNode::new(Expr::Unary(Box::new(lhs_ast)), range),
77 ))
78 }
79 }
80 }
81 }
82
83 fn parse_turnary_expression(
84 &mut self,
85 or_prog: CompiledProg,
86 or_ast: AstNode<ConditionalOr>,
87 ) -> CelResult<(CompiledProg, AstNode<Expr>)> {
88 let (expr_node, mut details) = or_prog.into_parts();
89
90 let (true_clause_node, true_clause_ast) = self.parse_conditional_or()?;
91 let (true_clause_node, true_clause_details) = true_clause_node.into_parts();
92
93 let next = self.tokenizer.next()?;
94 if next.as_token() != Some(&Token::Colon) {
95 return Err(SyntaxError::from_location(self.tokenizer.location())
96 .with_message(format!("Unexpected token {:?}, expected COLON", next))
97 .into());
98 }
99
100 let (false_clause_node, false_clause_ast) = self.parse_expression()?;
101 let (false_clause_node, false_clause_details) = false_clause_node.into_parts();
102
103 let range = or_ast.range().surrounding(false_clause_ast.range());
104
105 details.union_from(true_clause_details);
106 details.union_from(false_clause_details);
107
108 let turnary_node = if let NodeValue::ConstExpr(i) = expr_node {
109 if i.is_err() {
110 CompiledProg {
111 inner: NodeValue::ConstExpr(i),
112 details,
113 }
114 } else {
115 if cfg!(feature = "type_prop") {
116 if i.is_truthy() {
117 CompiledProg {
118 inner: true_clause_node,
119 details,
120 }
121 } else {
122 CompiledProg {
123 inner: false_clause_node,
124 details,
125 }
126 }
127 } else {
128 if let CelValue::Bool(b) = i {
129 if b {
130 CompiledProg {
131 inner: true_clause_node,
132 details,
133 }
134 } else {
135 CompiledProg {
136 inner: false_clause_node,
137 details,
138 }
139 }
140 } else {
141 CompiledProg {
142 inner: NodeValue::ConstExpr(CelValue::from_err(CelError::Value(
143 format!("{} cannot be converted to bool", i.as_type()),
144 ))),
145 details,
146 }
147 }
148 }
149 }
150 } else {
151 let true_clause_bytecode = true_clause_node.into_bytecode();
152 let false_clause_bytecode = false_clause_node.into_bytecode();
153
154 let after_true_clause = self.new_label();
155 let end_label = self.new_label();
156
157 CompiledProg {
158 inner: NodeValue::Bytecode(
159 expr_node
160 .into_bytecode()
161 .into_iter()
162 .chain(
163 [PreResolvedCodePoint::JmpCond {
164 when: JmpWhen::False,
165 label: after_true_clause,
166 }]
167 .into_iter(),
168 )
169 .chain(true_clause_bytecode.into_iter())
170 .chain(
171 [
172 PreResolvedCodePoint::Jmp { label: end_label },
173 PreResolvedCodePoint::Label(after_true_clause),
174 ]
175 .into_iter(),
176 )
177 .chain(false_clause_bytecode.into_iter())
178 .chain([PreResolvedCodePoint::Label(end_label)].into_iter())
179 .collect(),
180 ),
181 details,
182 }
183 };
184
185 Ok((
186 turnary_node,
187 AstNode::new(
188 Expr::Ternary {
189 condition: Box::new(or_ast),
190 true_clause: Box::new(true_clause_ast),
191 false_clause: Box::new(false_clause_ast),
192 },
193 range,
194 ),
195 ))
196 }
197
198 fn parse_match_expression(&mut self) -> CelResult<(CompiledProg, AstNode<Expr>)> {
199 let (condition_node, condition_ast) = self.parse_expression()?;
200
201 let mut range = condition_ast.range();
202
203 let (node_value, mut node_details) = condition_node.into_parts();
204 let mut node_bytecode = node_value.into_bytecode();
205
206 let next = self.tokenizer.next()?;
207 if next.as_token() != Some(&Token::LBrace) {
208 return Err(SyntaxError::from_location(self.tokenizer.location())
209 .with_message(format!("Unexpected token {:?}, expected LBRACE", next))
210 .into());
211 }
212
213 let mut expressions: Vec<AstNode<MatchCase>> = Vec::new();
214
215 let mut all_parts = Vec::new();
216
217 let mut comma_seen = true;
218
219 loop {
220 let rbrace = self.tokenizer.peek()?;
222 if rbrace.as_token() == Some(&Token::RBrace) {
223 range = range.surrounding(rbrace.unwrap().loc);
224 break;
225 }
226
227 if !comma_seen {
228 return Err(SyntaxError::from_location(self.tokenizer.location())
229 .with_message(format!("Expected COMMA"))
230 .into());
231 }
232 comma_seen = false;
233
234 let case_token = self.tokenizer.next()?;
236 if case_token.as_token() != Some(&Token::Case) {
237 return Err(SyntaxError::from_location(self.tokenizer.location())
238 .with_message(format!("Unexpected token {:?}, expected CASE", next))
239 .into());
240 }
241 let (pattern_prog, pattern_ast) = self.parse_match_pattern()?;
243 let (pattern_bytecode, pattern_details) = pattern_prog.into_parts();
244 let pattern_bytecode = pattern_bytecode.into_bytecode();
245
246 node_details.union_from(pattern_details);
247
248 let pattern_range = pattern_ast.range();
249
250 let colon_token = self.tokenizer.next()?;
252 if colon_token.as_token() != Some(&Token::Colon) {
253 return Err(SyntaxError::from_location(self.tokenizer.location())
254 .with_message(format!("Unexpected token {:?}, expected COLON", next))
255 .into());
256 }
257
258 let (expr_prog, expr_ast) = self.parse_expression()?;
260 let (expr_bytecode, expr_details) = expr_prog.into_parts();
261 let expr_bytecode: Vec<_> = [ByteCode::Pop.into()]
262 .into_iter()
263 .chain(expr_bytecode.into_bytecode().into_iter())
264 .collect();
265
266 node_details.union_from(expr_details);
267
268 let case_range = pattern_range.surrounding(expr_ast.range());
269
270 all_parts.push((pattern_bytecode, expr_bytecode));
271 expressions.push(AstNode::new(
272 MatchCase {
273 pattern: pattern_ast,
274 expr: Box::new(expr_ast),
275 },
276 case_range,
277 ));
278 let comma_token = self.tokenizer.peek()?;
281 if comma_token.as_token() == Some(&Token::Comma) {
282 comma_seen = true;
283 self.tokenizer.next()?;
284 }
285 }
286
287 self.tokenizer.next()?;
289
290 let after_match_s_l = self.new_label();
292
293 for (pattern_bytecode, expr_bytecode) in all_parts.into_iter() {
294 let after_case_l = self.new_label();
295
296 node_bytecode.push(ByteCode::Dup);
297 node_bytecode.extend(pattern_bytecode.into_iter());
298 node_bytecode.push(PreResolvedCodePoint::JmpCond {
299 when: JmpWhen::False,
300 label: after_case_l,
301 });
302
303 node_bytecode.extend(expr_bytecode);
304 node_bytecode.push(PreResolvedCodePoint::Jmp {
305 label: after_match_s_l,
306 });
307 node_bytecode.push(PreResolvedCodePoint::Label(after_case_l));
308 }
309
310 node_bytecode.extend([
311 ByteCode::Pop.into(),
312 ByteCode::Push(CelValue::from_null()).into(),
313 PreResolvedCodePoint::Label(after_match_s_l),
314 ]);
315
316 Ok((
317 CompiledProg::new(NodeValue::Bytecode(node_bytecode), node_details),
318 AstNode::new(
319 Expr::Match {
320 condition: Box::new(condition_ast),
321 cases: expressions,
322 },
323 range,
324 ),
325 ))
326 }
327
328 fn parse_match_pattern(&mut self) -> CelResult<(CompiledProg, AstNode<MatchPattern>)> {
329 let start = self.tokenizer.location();
330 let mut prefix_pattern = PrefixPattern::Eq;
331
332 if let Some(t) = self.tokenizer.peek()? {
333 if let Token::Ident(i) = t.token() {
334 let i = i.clone();
335 if i == "_" {
336 self.tokenizer.next()?;
337 let range = SourceRange::new(start, self.tokenizer.location());
338
339 return Ok((
340 CompiledProg::with_bytecode(
341 [
342 ByteCode::Pop, ByteCode::Push(CelValue::true_()), ]
345 .into_iter()
346 .collect(),
347 ),
348 AstNode::new(
349 MatchPattern::Any(AstNode::new(MatchAnyPattern {}, range)),
350 range,
351 ),
352 ));
353 } else if self.bindings.get_type(&i).is_some() {
354 self.tokenizer.next()?;
355 return Ok((
356 CompiledProg::with_bytecode(
357 [
358 ByteCode::Push(CelValue::Ident("type".to_owned())),
359 ByteCode::Call(1),
360 ByteCode::Push(CelValue::Ident(i.clone())),
361 ByteCode::Eq,
362 ]
363 .into_iter()
364 .collect(),
365 ),
366 AstNode::new(
367 MatchPattern::Type(AstNode::new(
368 MatchTypePattern::from_type_str(&i),
369 SourceRange::new(start, self.tokenizer.location()),
370 )),
371 SourceRange::new(start, self.tokenizer.location()),
372 ),
373 ));
374 }
375 }
376
377 if let Some(token_prefix_pattern) = PrefixPattern::from_token(t.token()) {
378 self.tokenizer.next()?;
379 prefix_pattern = token_prefix_pattern;
380 }
381 }
382
383 let op_range = SourceRange::new(start, self.tokenizer.location());
384
385 let (or_prod, or_ast) = self.parse_conditional_or()?;
386 let or_details = or_prod.details().clone();
387 let mut or_bc = or_prod.into_unresolved_bytecode();
388
389 or_bc.push(prefix_pattern.as_bytecode());
390
391 Ok((
392 CompiledProg::new(NodeValue::Bytecode(or_bc), or_details),
393 AstNode::new(
394 MatchPattern::Cmp {
395 op: AstNode::new(prefix_pattern.as_ast(), op_range),
396 or: or_ast,
397 },
398 SourceRange::new(start, self.tokenizer.location()),
399 ),
400 ))
401 }
402
403 fn parse_conditional_or(&mut self) -> CelResult<(CompiledProg, AstNode<ConditionalOr>)> {
404 let (mut current_node, mut current_ast) = into_unary(self.parse_conditional_and()?);
405
406 let label = self.new_label();
407
408 loop {
409 if let Some(Token::OrOr) = self.tokenizer.peek()?.as_token() {
410 self.tokenizer.next()?;
411 let (rhs_node, rhs_ast) = self.parse_conditional_and()?;
412
413 let jmp_node = CompiledProg::with_code_points(vec![
414 PreResolvedCodePoint::Bytecode(ByteCode::Test),
415 PreResolvedCodePoint::Bytecode(ByteCode::Dup),
416 PreResolvedCodePoint::JmpCond {
417 when: JmpWhen::True,
418 label,
419 },
420 ]);
421
422 let range = current_ast.range().surrounding(rhs_ast.range());
423
424 current_ast = AstNode::new(
425 ConditionalOr::Binary {
426 lhs: Box::new(current_ast),
427 rhs: rhs_ast,
428 },
429 range,
430 );
431 current_node = compile!(
432 [ByteCode::Or.into()],
433 current_node.or(&rhs_node),
434 current_node,
435 jmp_node,
436 rhs_node
437 );
438 } else {
439 break;
440 }
441 }
442
443 current_node.append_if_bytecode([PreResolvedCodePoint::Label(label)]);
444
445 Ok((current_node, current_ast))
446 }
447
448 fn parse_conditional_and(&mut self) -> CelResult<(CompiledProg, AstNode<ConditionalAnd>)> {
449 let (mut current_node, mut current_ast) = into_unary(self.parse_relation()?);
450
451 let label = self.new_label();
452
453 loop {
454 if let Some(Token::AndAnd) = self.tokenizer.peek()?.as_token() {
455 self.tokenizer.next()?;
456 let (rhs_node, rhs_ast) = self.parse_relation()?;
457
458 let jmp_node = CompiledProg::with_code_points(vec![
459 PreResolvedCodePoint::Bytecode(ByteCode::Test),
460 PreResolvedCodePoint::Bytecode(ByteCode::Dup),
461 PreResolvedCodePoint::JmpCond {
462 when: JmpWhen::False,
463 label: label,
464 },
465 ]);
466
467 let range = current_ast.range().surrounding(rhs_ast.range());
468
469 current_ast = AstNode::new(
470 ConditionalAnd::Binary {
471 lhs: Box::new(current_ast),
472 rhs: rhs_ast,
473 },
474 range,
475 );
476 current_node = compile!(
477 [ByteCode::And.into()],
478 current_node.and(rhs_node),
479 current_node,
480 jmp_node,
481 rhs_node
482 );
483 } else {
484 break;
485 }
486 }
487 current_node.append_if_bytecode([PreResolvedCodePoint::Label(label)]);
488
489 Ok((current_node, current_ast))
490 }
491
492 fn parse_relation(&mut self) -> CelResult<(CompiledProg, AstNode<Relation>)> {
493 let (mut current_node, mut current_ast) = into_unary(self.parse_addition()?);
494
495 loop {
496 match self.tokenizer.peek()?.as_token() {
497 Some(Token::LessThan) => {
498 self.tokenizer.next()?;
499
500 let (rhs_node, rhs_ast) = self.parse_addition()?;
501 let range = current_ast.range().surrounding(rhs_ast.range());
502
503 current_ast = AstNode::new(
504 Relation::Binary {
505 lhs: Box::new(current_ast),
506 op: Relop::Lt,
507 rhs: rhs_ast,
508 },
509 range,
510 );
511
512 current_node = compile!(
513 [ByteCode::Lt.into()],
514 current_node.lt(rhs_node),
515 current_node,
516 rhs_node
517 );
518 }
519 Some(Token::LessEqual) => {
520 self.tokenizer.next()?;
521 let (rhs_node, rhs_ast) = self.parse_addition()?;
522 let range = current_ast.range().surrounding(rhs_ast.range());
523
524 current_ast = AstNode::new(
525 Relation::Binary {
526 lhs: Box::new(current_ast),
527 op: Relop::Le,
528 rhs: rhs_ast,
529 },
530 range,
531 );
532
533 current_node = compile!(
534 [ByteCode::Le.into()],
535 current_node.le(rhs_node),
536 current_node,
537 rhs_node
538 );
539 }
540 Some(Token::EqualEqual) => {
541 self.tokenizer.next()?;
542 let (rhs_node, rhs_ast) = self.parse_addition()?;
543 let range = current_ast.range().surrounding(rhs_ast.range());
544
545 current_ast = AstNode::new(
546 Relation::Binary {
547 lhs: Box::new(current_ast),
548 op: Relop::Eq,
549 rhs: rhs_ast,
550 },
551 range,
552 );
553
554 current_node = compile!(
555 [ByteCode::Eq.into()],
556 CelValueDyn::eq(¤t_node, &rhs_node),
557 current_node,
558 rhs_node
559 );
560 }
561 Some(Token::NotEqual) => {
562 self.tokenizer.next()?;
563 let (rhs_node, rhs_ast) = self.parse_addition()?;
564 let range = current_ast.range().surrounding(rhs_ast.range());
565
566 current_ast = AstNode::new(
567 Relation::Binary {
568 lhs: Box::new(current_ast),
569 op: Relop::Ne,
570 rhs: rhs_ast,
571 },
572 range,
573 );
574
575 current_node = compile!(
576 [ByteCode::Ne.into()],
577 current_node.neq(rhs_node),
578 current_node,
579 rhs_node
580 );
581 }
582 Some(Token::GreaterEqual) => {
583 self.tokenizer.next()?;
584 let (rhs_node, rhs_ast) = self.parse_addition()?;
585 let range = current_ast.range().surrounding(rhs_ast.range());
586
587 current_ast = AstNode::new(
588 Relation::Binary {
589 lhs: Box::new(current_ast),
590 op: Relop::Ge,
591 rhs: rhs_ast,
592 },
593 range,
594 );
595
596 current_node = compile!(
597 [ByteCode::Ge.into()],
598 current_node.ge(rhs_node),
599 current_node,
600 rhs_node
601 );
602 }
603 Some(Token::GreaterThan) => {
604 self.tokenizer.next()?;
605 let (rhs_node, rhs_ast) = self.parse_addition()?;
606 let range = current_ast.range().surrounding(rhs_ast.range());
607
608 current_ast = AstNode::new(
609 Relation::Binary {
610 lhs: Box::new(current_ast),
611 op: Relop::Gt,
612 rhs: rhs_ast,
613 },
614 range,
615 );
616
617 current_node = compile!(
618 [ByteCode::Gt.into()],
619 current_node.gt(rhs_node),
620 current_node,
621 rhs_node
622 );
623 }
624 Some(Token::In) => {
625 self.tokenizer.next()?;
626 let (rhs_node, rhs_ast) = self.parse_addition()?;
627 let range = current_ast.range().surrounding(rhs_ast.range());
628
629 current_ast = AstNode::new(
630 Relation::Binary {
631 lhs: Box::new(current_ast),
632 op: Relop::In,
633 rhs: rhs_ast,
634 },
635 range,
636 );
637 current_node = compile!(
638 [ByteCode::In.into()],
639 current_node.in_(rhs_node),
640 current_node,
641 rhs_node
642 )
643 }
644 _ => break,
645 }
646 }
647
648 Ok((current_node, current_ast))
649 }
650
651 fn parse_addition(&mut self) -> CelResult<(CompiledProg, AstNode<Addition>)> {
652 let (mut current_node, mut current_ast) = into_unary(self.parse_multiplication()?);
653
654 loop {
655 match self.tokenizer.peek()?.as_token() {
656 Some(Token::Add) => {
657 self.tokenizer.next()?;
658
659 let (rhs_node, rhs_ast) = self.parse_multiplication()?;
660 let range = current_ast.range().surrounding(rhs_ast.range());
661
662 current_ast = AstNode::new(
663 Addition::Binary {
664 lhs: Box::new(current_ast),
665 op: AddOp::Add,
666 rhs: rhs_ast,
667 },
668 range,
669 );
670
671 current_node = compile!(
672 [ByteCode::Add.into()],
673 current_node + rhs_node,
674 current_node,
675 rhs_node
676 );
677 }
678 Some(Token::Minus) => {
679 self.tokenizer.next()?;
680
681 let (rhs_node, rhs_ast) = self.parse_multiplication()?;
682 let range = current_ast.range().surrounding(rhs_ast.range());
683
684 current_ast = AstNode::new(
685 Addition::Binary {
686 lhs: Box::new(current_ast),
687 op: AddOp::Sub,
688 rhs: rhs_ast,
689 },
690 range,
691 );
692
693 current_node = compile!(
694 [ByteCode::Sub.into()],
695 current_node - rhs_node,
696 current_node,
697 rhs_node
698 );
699 }
700 _ => break,
701 }
702 }
703
704 Ok((current_node, current_ast))
705 }
706
707 fn parse_multiplication(&mut self) -> CelResult<(CompiledProg, AstNode<Multiplication>)> {
708 let (mut current_node, mut current_ast) = into_unary(self.parse_unary()?);
709
710 loop {
711 match self.tokenizer.peek()?.as_token() {
712 Some(Token::Multiply) => {
713 self.tokenizer.next()?;
714
715 let (rhs_node, rhs_ast) = self.parse_unary()?;
716 let range = current_ast.range().surrounding(rhs_ast.range());
717
718 current_ast = AstNode::new(
719 Multiplication::Binary {
720 lhs: Box::new(current_ast),
721 op: MultOp::Mult,
722 rhs: rhs_ast,
723 },
724 range,
725 );
726 current_node = compile!(
727 [ByteCode::Mul.into()],
728 current_node * rhs_node,
729 current_node,
730 rhs_node
731 );
732 }
733 Some(Token::Divide) => {
734 self.tokenizer.next()?;
735
736 let (rhs_node, rhs_ast) = self.parse_unary()?;
737 let range = current_ast.range().surrounding(rhs_ast.range());
738
739 current_ast = AstNode::new(
740 Multiplication::Binary {
741 lhs: Box::new(current_ast),
742 op: MultOp::Div,
743 rhs: rhs_ast,
744 },
745 range,
746 );
747
748 current_node = compile!(
749 [ByteCode::Div.into()],
750 current_node / rhs_node,
751 current_node,
752 rhs_node
753 );
754 }
755 Some(Token::Mod) => {
756 self.tokenizer.next()?;
757
758 let (rhs_node, rhs_ast) = self.parse_unary()?;
759 let range = current_ast.range().surrounding(rhs_ast.range());
760
761 current_ast = AstNode::new(
762 Multiplication::Binary {
763 lhs: Box::new(current_ast),
764 op: MultOp::Mod,
765 rhs: rhs_ast,
766 },
767 range,
768 );
769
770 current_node = compile!(
771 [ByteCode::Mod.into()],
772 current_node % rhs_node,
773 current_node,
774 rhs_node
775 );
776 }
777 _ => break,
778 }
779 }
780
781 Ok((current_node, current_ast))
782 }
783
784 fn parse_unary(&mut self) -> CelResult<(CompiledProg, AstNode<Unary>)> {
785 match self.tokenizer.peek()?.as_token() {
786 Some(Token::Not) => {
787 let (not, not_ast) = self.parse_not_list()?;
788 let (member, member_ast) = self.parse_member()?;
789
790 let range = not_ast.range().surrounding(member_ast.range());
791
792 Ok((
793 member.append_result(not),
794 AstNode::new(
795 Unary::NotMember {
796 nots: not_ast,
797 member: member_ast,
798 },
799 range,
800 ),
801 ))
802 }
803 Some(Token::Minus) => {
804 let (neg, neg_ast) = self.parse_neg_list()?;
805 let (member, member_ast) = self.parse_member()?;
806
807 let range = member_ast.range().surrounding(neg_ast.range());
808
809 Ok((
810 member.append_result(neg),
811 AstNode::new(
812 Unary::NegMember {
813 negs: neg_ast,
814 member: member_ast,
815 },
816 range,
817 ),
818 ))
819 }
820 _ => Ok(into_unary(self.parse_member()?)),
821 }
822 }
823
824 fn parse_not_list(&mut self) -> CelResult<(CompiledProg, AstNode<NotList>)> {
825 match self.tokenizer.peek()? {
826 Some(&TokenWithLoc {
827 token: Token::Not,
828 loc,
829 }) => {
830 self.tokenizer.next()?;
831
832 let (not_list, ast) = self.parse_not_list()?;
833 let node = compile!([ByteCode::Not.into()], not_list, not_list);
834
835 let range = ast.range().surrounding(loc);
836
837 Ok((
838 node,
839 AstNode::new(
840 NotList::List {
841 tail: Box::new(ast),
842 },
843 range,
844 ),
845 ))
846 }
847 _ => {
848 let start_loc = self.tokenizer.location();
849 Ok((
850 CompiledProg::empty(),
851 AstNode::new(NotList::EmptyList, SourceRange::new(start_loc, start_loc)),
852 ))
853 }
854 }
855 }
856
857 fn parse_neg_list(&mut self) -> CelResult<(CompiledProg, AstNode<NegList>)> {
858 match self.tokenizer.peek()? {
859 Some(&TokenWithLoc {
860 token: Token::Minus,
861 loc,
862 }) => {
863 self.tokenizer.next()?;
864
865 let (neg_list, ast) = self.parse_neg_list()?;
866 let node = compile!([ByteCode::Neg.into()], neg_list, neg_list);
867
868 let range = ast.range().surrounding(loc);
869
870 Ok((
871 node,
872 AstNode::new(
873 NegList::List {
874 tail: Box::new(ast),
875 },
876 range,
877 ),
878 ))
879 }
880 _ => {
881 let start_loc = self.tokenizer.location();
882 Ok((
883 CompiledProg::empty(),
884 AstNode::new(NegList::EmptyList, SourceRange::new(start_loc, start_loc)),
885 ))
886 }
887 }
888 }
889
890 fn parse_member(&mut self) -> CelResult<(CompiledProg, AstNode<Member>)> {
891 let (primary_node, primary_ast) = self.parse_primary()?;
892
893 let mut member_prime_node = CompiledProg::from_node(primary_node);
894 let mut member_prime_ast: Vec<AstNode<MemberPrime>> = Vec::new();
895
896 loop {
897 match self.tokenizer.peek()? {
898 Some(&TokenWithLoc {
899 token: Token::Dot,
900 loc: dot_loc,
901 }) => {
902 self.tokenizer.next()?;
903 match self.tokenizer.next()? {
904 Some(TokenWithLoc {
905 token: Token::Ident(ident),
906 loc,
907 }) => {
908 let res = CompiledProg::with_const(CelValue::from_ident(&ident));
909
910 member_prime_node = CompiledProg::from_children2_w_bytecode_cannone(
911 member_prime_node,
912 res,
913 vec![ByteCode::Access],
914 |o, c| {
915 if let CelValue::Ident(s) = c {
916 if o.is_obj() {
920 match o.access(&s) {
924 CelValue::Err(_) => None,
925 o => Some(o),
926 }
927 } else {
928 None
929 }
930 } else {
931 Some(CelValue::from_err(CelError::value(
932 "Accessor must be ident",
933 )))
934 }
935 },
936 );
937
938 member_prime_ast.push(AstNode::new(
939 MemberPrime::MemberAccess {
940 ident: AstNode::new(Ident(ident.clone()), loc),
941 },
942 dot_loc.surrounding(loc),
943 ));
944 }
945 Some(other) => {
946 return Err(SyntaxError::from_location(self.tokenizer.location())
947 .with_message(format!("Expected IDENT got {:?}", other))
948 .into());
949 }
950 None => {
951 return Err(SyntaxError::from_location(self.tokenizer.location())
952 .with_message("Expected IDENT got NOTHING".to_string())
953 .into());
954 }
955 }
956 }
957 Some(&TokenWithLoc {
958 token: Token::LParen,
959 loc,
960 }) => {
961 self.tokenizer.next()?;
962
963 let args = self.parse_expression_list(Token::RParen)?;
964
965 let token = self.tokenizer.next()?;
966 if let Some(TokenWithLoc {
967 token: Token::RParen,
968 loc: rparen_loc,
969 }) = token
970 {
971 let args_len = args.len();
972
973 let mut args_ast = Vec::new();
974 let mut args_node = CompiledProg::empty();
975 for (a, ast) in args.into_iter().rev() {
977 args_ast.push(ast);
978 args_node =
979 args_node.append_result(CompiledProg::with_code_points(vec![
980 ByteCode::Push(a.into_unresolved_bytecode().resolve().into())
981 .into(),
982 ]))
983 }
984
985 member_prime_node = args_node
986 .consume_child(member_prime_node)
987 .consume_child(CompiledProg::with_code_points(vec![ByteCode::Call(
988 args_len as u32,
989 )
990 .into()]));
991
992 member_prime_node = self.check_for_const(member_prime_node);
993
994 member_prime_ast.push(AstNode::new(
995 MemberPrime::Call {
996 call: AstNode::new(
997 ExprList { exprs: args_ast },
998 loc.surrounding(rparen_loc),
999 ),
1000 },
1001 loc.surrounding(rparen_loc),
1002 ));
1003 } else {
1004 return Err(SyntaxError::from_location(self.tokenizer.location())
1005 .with_message(format!(
1006 "Unexpected token {}, expected RPARAN",
1007 &token.map_or("NOTHING".to_string(), |x| format!("{:?}", x))
1008 ))
1009 .into());
1010 }
1011 }
1012 Some(&TokenWithLoc {
1013 token: Token::LBracket,
1014 loc,
1015 }) => {
1016 self.tokenizer.next()?;
1017
1018 let (index_node, index_ast) = self.parse_expression()?;
1019
1020 match self.tokenizer.next()? {
1021 Some(TokenWithLoc {
1022 token: Token::RBracket,
1023 loc: rbracket_loc,
1024 }) => {
1025 member_prime_node = compile!(
1026 [ByteCode::Index.into()],
1027 member_prime_node.index(index_node),
1028 member_prime_node,
1029 index_node
1030 );
1031
1032 member_prime_ast.push(AstNode::new(
1033 MemberPrime::ArrayAccess { access: index_ast },
1034 loc.surrounding(rbracket_loc),
1035 ));
1036 }
1037 next_token => {
1038 return Err(SyntaxError::from_location(self.tokenizer.location())
1039 .with_message(format!(
1040 "Unexpected token {}, expected RBRACKET",
1041 &next_token
1042 .map_or("NOTHING".to_string(), |x| format!("{:?}", x))
1043 ))
1044 .into());
1045 }
1046 }
1047 }
1048 _ => break,
1049 }
1050 }
1051
1052 let mut range = primary_ast.range();
1053 for m in member_prime_ast.iter() {
1054 range = range.surrounding(m.range());
1055 }
1056
1057 Ok((
1058 member_prime_node,
1059 AstNode::new(
1060 Member {
1061 primary: primary_ast,
1062 member: member_prime_ast,
1063 },
1064 range,
1065 ),
1066 ))
1067 }
1068
1069 fn parse_primary(&mut self) -> CelResult<(CompiledProg, AstNode<Primary>)> {
1070 match self.tokenizer.next()? {
1071 Some(TokenWithLoc {
1072 token: Token::Ident(val),
1073 loc,
1074 }) => Ok((
1075 CompiledProg::with_code_points(vec![
1076 ByteCode::Push(CelValue::from_ident(&val)).into()
1077 ])
1078 .add_ident(&val),
1079 AstNode::new(Primary::Ident(Ident(val.clone())), loc),
1080 )),
1081 Some(TokenWithLoc {
1082 token: Token::LParen,
1083 loc,
1084 }) => {
1085 let (expr, expr_ast) = self.parse_expression()?;
1086
1087 let next_token = self.tokenizer.next();
1088 let rparen_loc = match next_token? {
1089 Some(TokenWithLoc {
1090 token: Token::RParen,
1091 loc,
1092 }) => loc,
1093 Some(TokenWithLoc { token, loc }) => {
1094 return Err(CelError::syntax(
1095 SyntaxError::from_location(loc.start())
1096 .with_message(format!("Expected RPAREN got {:?}", token)),
1097 ))
1098 }
1099 None => {
1100 return Err(CelError::syntax(
1101 SyntaxError::from_location(loc.start())
1102 .with_message("Open paren!".to_owned()),
1103 ))
1104 }
1105 };
1106
1107 Ok((
1108 CompiledProg::from_node(expr),
1109 AstNode::new(Primary::Parens(expr_ast), loc.surrounding(rparen_loc)),
1110 ))
1111 }
1112 Some(TokenWithLoc {
1113 token: Token::LBracket,
1114 loc,
1115 }) => {
1116 let expr_node_list = self.parse_expression_list(Token::RBracket)?;
1118 let expr_list_len = expr_node_list.len();
1119 let (expr_list, expr_list_ast): (Vec<_>, Vec<_>) =
1120 expr_node_list.into_iter().unzip();
1121
1122 let range = if let Some(TokenWithLoc {
1123 token: Token::RBracket,
1124 loc: rbracket_loc,
1125 }) = self.tokenizer.peek()?
1126 {
1127 loc.surrounding(*rbracket_loc)
1128 } else {
1129 return Err(SyntaxError::from_location(self.tokenizer.location())
1130 .with_message(format!("Unexpected token, expected RBRACKET",))
1131 .into());
1132 };
1133
1134 self.tokenizer.next()?;
1135
1136 Ok((
1137 CompiledProg::from_children_w_bytecode(
1138 expr_list,
1139 vec![ByteCode::MkList(expr_list_len as u32)],
1140 |c| c.into(),
1141 ),
1142 AstNode::new(
1143 Primary::ListConstruction(AstNode::new(
1144 ExprList {
1145 exprs: expr_list_ast,
1146 },
1147 range,
1148 )),
1149 range,
1150 ),
1151 ))
1152 }
1153 Some(TokenWithLoc {
1154 token: Token::LBrace,
1155 loc,
1156 }) => {
1157 let obj_init = self.parse_obj_inits()?;
1159
1160 let range = if let Some(&TokenWithLoc {
1161 token: Token::RBrace,
1162 loc: rbrace_loc,
1163 }) = self.tokenizer.peek()?
1164 {
1165 self.tokenizer.next()?;
1166
1167 loc.surrounding(rbrace_loc)
1168 } else {
1169 return Err(SyntaxError::from_location(self.tokenizer.location())
1170 .with_message(format!("Unexpected token, expected RBRACE",))
1171 .into());
1172 };
1173
1174 let obj_init_len = obj_init.len();
1175 debug_assert!(obj_init_len % 2 == 0);
1176
1177 let mut init_asts = Vec::new();
1178
1179 let (compiled_children, children_ast): (Vec<_>, Vec<_>) =
1180 obj_init.into_iter().unzip();
1181
1182 let mut children_ast_iter = children_ast.into_iter();
1183 while let Some(val_ast) = children_ast_iter.next() {
1185 let key_ast = children_ast_iter.next().unwrap();
1186
1187 let range = key_ast.range().surrounding(val_ast.range());
1188
1189 init_asts.push(AstNode::new(
1190 ObjInit {
1191 key: key_ast,
1192 value: val_ast,
1193 },
1194 range,
1195 ));
1196 }
1197
1198 let new_ast = AstNode::new(
1199 Primary::ObjectInit(AstNode::new(ObjInits { inits: init_asts }, range)),
1200 range,
1201 );
1202
1203 Ok((
1204 CompiledProg::from_children_w_bytecode(
1205 compiled_children,
1206 vec![ByteCode::MkDict(obj_init_len as u32 / 2)],
1207 |vals| {
1208 let mut obj_map = HashMap::new();
1209 for i in (0..vals.len()).step_by(2) {
1210 let key = if let CelValue::String(ref k) = vals[i + 1] {
1211 k
1212 } else {
1213 return CelValue::from_err(CelError::value(
1214 "Only strings can be object keys",
1215 ));
1216 };
1217
1218 obj_map.insert(key.clone(), vals[i].clone());
1219 }
1220
1221 obj_map.into()
1222 },
1223 ),
1224 new_ast,
1225 ))
1226 }
1227 Some(TokenWithLoc {
1228 token: Token::UIntLit(val),
1229 loc,
1230 }) => Ok((
1231 CompiledProg::with_const(val.into()),
1232 AstNode::new(Primary::Literal(LiteralsAndKeywords::UnsignedLit(val)), loc),
1233 )),
1234 Some(TokenWithLoc {
1235 token: Token::IntLit(val),
1236 loc,
1237 }) => Ok((
1238 CompiledProg::with_const((val as i64).into()),
1239 AstNode::new(
1240 Primary::Literal(LiteralsAndKeywords::IntegerLit(val as i64)),
1241 loc,
1242 ),
1243 )),
1244 Some(TokenWithLoc {
1245 token: Token::FloatLit(val),
1246 loc,
1247 }) => Ok((
1248 CompiledProg::with_const((val).into()),
1249 AstNode::new(Primary::Literal(LiteralsAndKeywords::FloatingLit(val)), loc),
1250 )),
1251 Some(TokenWithLoc {
1252 token: Token::StringLit(val),
1253 loc,
1254 }) => Ok((
1255 CompiledProg::with_const(val.clone().into()),
1256 AstNode::new(
1257 Primary::Literal(LiteralsAndKeywords::StringLit(val.clone())),
1258 loc,
1259 ),
1260 )),
1261 Some(TokenWithLoc {
1262 token: Token::ByteStringLit(val),
1263 loc,
1264 }) => Ok((
1265 CompiledProg::with_const(val.clone().into()),
1266 AstNode::new(
1267 Primary::Literal(LiteralsAndKeywords::ByteStringLit(val.into())),
1268 loc,
1269 ),
1270 )),
1271 Some(TokenWithLoc {
1272 token: Token::FStringLit(segments),
1273 loc,
1274 }) => {
1275 let mut bytecode = Vec::<PreResolvedCodePoint>::new();
1276
1277 for segment in segments.iter() {
1278 match segment {
1279 FStringSegment::Lit(c) => {
1280 bytecode.push(ByteCode::Push(CelValue::String(c.clone())).into())
1281 }
1282 FStringSegment::Expr(e) => {
1283 let mut tok = StringTokenizer::with_input(&e);
1284 let mut comp = CelCompiler::with_tokenizer(&mut tok);
1285
1286 let (e, _) = comp.parse_expression()?;
1287
1288 bytecode.push(
1289 ByteCode::Push(CelValue::ByteCode(
1290 e.into_unresolved_bytecode().resolve(),
1291 ))
1292 .into(),
1293 );
1294 }
1295 }
1296 bytecode.push(ByteCode::Push(CelValue::Ident("string".to_string())).into());
1297 bytecode.push(ByteCode::Call(1).into());
1298 }
1299
1300 bytecode.push(ByteCode::FmtString(segments.len() as u32).into());
1302
1303 Ok((
1304 CompiledProg::with_code_points(bytecode),
1305 AstNode::new(
1306 Primary::Literal(LiteralsAndKeywords::FStringList(segments.clone())),
1307 loc,
1308 ),
1309 ))
1310 }
1311 Some(TokenWithLoc {
1312 token: Token::BoolLit(val),
1313 loc,
1314 }) => Ok((
1315 CompiledProg::with_const(val.into()),
1316 AstNode::new(Primary::Literal(LiteralsAndKeywords::BooleanLit(val)), loc),
1317 )),
1318 Some(TokenWithLoc {
1319 token: Token::Null,
1320 loc,
1321 }) => Ok((
1322 CompiledProg::with_const(CelValue::from_null()),
1323 AstNode::new(Primary::Literal(LiteralsAndKeywords::NullLit), loc),
1324 )),
1325 _ => Err(SyntaxError::from_location(self.tokenizer.location())
1326 .with_message(format!(
1327 "unexpected {:?}! expecting PRIMARY",
1328 self.tokenizer.peek()
1329 ))
1330 .into()),
1331 }
1332 }
1333
1334 fn parse_expression_list(
1335 &mut self,
1336 ending: Token,
1337 ) -> CelResult<Vec<(CompiledProg, AstNode<Expr>)>> {
1338 let mut exprs = Vec::new();
1339
1340 'outer: loop {
1341 match self.tokenizer.peek()?.as_token() {
1342 Some(val) => {
1343 if *val == ending {
1344 break 'outer;
1345 }
1346 }
1347 None => {}
1348 }
1349
1350 let compiled = self.parse_expression()?;
1351 exprs.push(compiled);
1352
1353 match self.tokenizer.peek()?.as_token() {
1354 Some(Token::Comma) => {
1355 self.tokenizer.next()?;
1356 continue;
1357 }
1358 _ => break 'outer,
1359 }
1360 }
1361
1362 Ok(exprs)
1363 }
1364
1365 fn parse_obj_inits(&mut self) -> CelResult<Vec<(CompiledProg, AstNode<Expr>)>> {
1366 let mut inits = Vec::new();
1367
1368 'outer: loop {
1369 if self.tokenizer.peek()?.as_token() == Some(&Token::RBrace) {
1370 break 'outer;
1371 }
1372
1373 let compiled_key = self.parse_expression()?;
1374
1375 let next_token = self.tokenizer.next()?.into_token();
1376 if next_token != Some(Token::Colon) {
1377 return Err(SyntaxError::from_location(self.tokenizer.location())
1378 .with_message(format!("Invalid token: expected ':' got {:?}", next_token))
1379 .into());
1380 }
1381 let compiled_value = self.parse_expression()?;
1383
1384 inits.push(compiled_value);
1385 inits.push(compiled_key);
1386
1387 match self.tokenizer.peek()?.as_token() {
1388 Some(Token::Comma) => {
1389 self.tokenizer.next()?;
1390 continue;
1391 }
1392 _ => break 'outer,
1393 }
1394 }
1395
1396 Ok(inits)
1397 }
1398
1399 #[inline]
1400 fn check_for_const(&self, member_prime_node: CompiledProg) -> CompiledProg {
1401 let mut i = Interpreter::empty();
1402 i.add_bindings(&self.bindings);
1403 let bc = member_prime_node.into_unresolved_bytecode().resolve();
1404 let r = i.run_raw(&bc, true);
1405
1406 match r {
1407 Ok(v) => CompiledProg::with_const(v),
1408 Err(_) => CompiledProg::with_bytecode(bc),
1409 }
1410 }
1411}
1412
1413#[cfg(test)]
1414mod test {
1415 use test_case::test_case;
1416
1417 use crate::compiler::string_tokenizer::StringTokenizer;
1418
1419 use super::CelCompiler;
1420
1421 #[test_case("3+1"; "addition")]
1422 #[test_case("(1+foo) / 23"; "with literal")]
1423 #[test_case("(true || false) + 23"; "with boolean")]
1424 #[test_case("foo.bar"; "member access")]
1425 #[test_case("foo[3]"; "list access")]
1426 #[test_case("foo.bar()"; "member call")]
1427 #[test_case("foo()"; "empty function call")]
1428 #[test_case("foo(3)"; "function call")]
1429 #[test_case("1"; "just 1")]
1430 #[test_case("foo"; "an ident")]
1431 #[test_case("foo.bar.baz"; "deep member access")]
1432 #[test_case("--foo"; "double neg")]
1433 #[test_case("foo || true"; "or")]
1434 #[test_case("int(foo.bar && foo.baz) + 4 - (8 * 7)"; "complex")]
1435 #[test_case("true ? 3 : 1"; "ternary")]
1436 #[test_case("[1, 2, 3 + 3, 4 * 2, \"fish\"]"; "list construction")]
1437 fn test_parser(input: &str) {
1438 let mut tokenizer = StringTokenizer::with_input(input);
1439 CelCompiler::with_tokenizer(&mut tokenizer)
1440 .compile()
1441 .unwrap();
1442 }
1443
1444 #[test]
1445 fn syntax_error() {
1446 let mut tokenizer = StringTokenizer::with_input("3 + 4 ) - 3");
1447
1448 let e = CelCompiler::with_tokenizer(&mut tokenizer).compile();
1449
1450 assert!(e.is_err());
1451 let _ = format!("{}", e.unwrap_err());
1452 }
1453}