Skip to main content

cobble/parser/
combinators.rs

1use super::tokenizer::{tokenize, Token};
2use crate::ast::*;
3use chumsky::prelude::*;
4
5/// Token parser using chumsky
6pub fn token_parser<'a>(
7) -> impl Parser<'a, &'a [Token], Program, extra::Err<Rich<'a, Token>>> + Clone {
8    recursive(|stmt| {
9        // Expression parser
10        let expr = recursive(|expr| {
11            let array_literal = just(&Token::LBracket)
12                .ignore_then(
13                    expr.clone()
14                        .separated_by(just(&Token::Comma))
15                        .allow_trailing()
16                        .collect(),
17                )
18                .then_ignore(just(&Token::RBracket))
19                .map(Expression::Array);
20
21            let map_entry = choice((
22                select_ref! { Token::String(s) => s.clone() },
23                select_ref! { Token::Ident(s) => s.clone() },
24            ))
25            .then_ignore(just(&Token::Colon))
26            .then(expr.clone());
27
28            let map_literal = just(&Token::LBrace)
29                .ignore_then(
30                    map_entry
31                        .separated_by(just(&Token::Comma))
32                        .allow_trailing()
33                        .collect(),
34                )
35                .then_ignore(just(&Token::RBrace))
36                .map(Expression::Map);
37
38            let atom = choice((
39                select_ref! {
40                    Token::Number(n) => Expression::Number(n.parse().unwrap_or(0.0)),
41                    Token::String(s) => Expression::String(s.clone()),
42                    Token::True_ => Expression::Boolean(true),
43                    Token::False_ => Expression::Boolean(false),
44                    Token::None_ => Expression::None,
45                    Token::Ident(s) => Expression::Identifier(s.clone()),
46                },
47                array_literal,
48                map_literal,
49                // Parenthesized expression
50                just(&Token::LParen)
51                    .ignore_then(expr.clone())
52                    .then_ignore(just(&Token::RParen)),
53            ));
54
55            // Attribute access (e.g., stdlib.event)
56            let postfix = atom.foldl(
57                choice((
58                    just(&Token::Dot)
59                        .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
60                        .map(|attr| (true, Expression::Identifier(attr))), // true = dot access
61                    just(&Token::LBracket)
62                        .ignore_then(expr.clone())
63                        .then_ignore(just(&Token::RBracket))
64                        .map(|index| (false, index)), // false = bracket access
65                ))
66                .repeated(),
67                |base, (is_dot, accessor)| {
68                    if is_dot {
69                        if let Expression::Identifier(attr) = accessor {
70                            Expression::Attribute(Box::new(base), attr)
71                        } else {
72                            unreachable!()
73                        }
74                    } else {
75                        Expression::Subscript(Box::new(base), Box::new(accessor))
76                    }
77                },
78            );
79
80            // Function call
81            let call = postfix
82                .then(
83                    just(&Token::LParen)
84                        .ignore_then(
85                            expr.clone()
86                                .separated_by(just(&Token::Comma))
87                                .allow_trailing()
88                                .collect(),
89                        )
90                        .then_ignore(just(&Token::RParen))
91                        .or_not(),
92                )
93                .map(|(func, args)| {
94                    if let Some(args) = args {
95                        Expression::Call(Box::new(func), args)
96                    } else {
97                        func
98                    }
99                });
100
101            // Binary operations with proper precedence
102            // Unary +/- (high precedence, but lower than call, higher than power)
103            // Allow unary operators before any atom/call/parenthesized expression
104            let unary = recursive(|unary_rec| {
105                choice((
106                    just(&Token::Minus).to(UnaryOp::Neg),
107                    just(&Token::Plus).to(UnaryOp::Pos),
108                ))
109                .then(unary_rec.clone())
110                .map(|(op, expr)| Expression::Unary(op, Box::new(expr)))
111                .or(call.clone())
112            });
113
114            // Highest precedence: ^ (power) - right-associative
115            // Power operator is right-associative: 2^3^2 = 2^(3^2) = 512
116            // We parse left-to-right but fold right-to-left for right-associativity
117            let power = unary
118                .clone()
119                .then(
120                    just(&Token::Caret)
121                        .to(BinaryOp::Pow)
122                        .then(unary.clone())
123                        .repeated()
124                        .collect::<Vec<_>>(),
125                )
126                .map(|(first, rest)| {
127                    if rest.is_empty() {
128                        first
129                    } else {
130                        // Collect all operands: [first, second, third, ...]
131                        let mut all_operands = vec![first];
132                        for (_, operand) in &rest {
133                            all_operands.push(operand.clone());
134                        }
135
136                        // Build right-associative tree by folding from right to left
137                        // For [a, b, c]: a ^ (b ^ c)
138                        let mut result = all_operands.pop().unwrap();
139                        while let Some(operand) = all_operands.pop() {
140                            result = Expression::Binary(
141                                Box::new(operand),
142                                BinaryOp::Pow,
143                                Box::new(result),
144                            );
145                        }
146                        result
147                    }
148                });
149
150            // Second highest: *, /, %
151            let mul_div_mod = power.clone().foldl(
152                choice((
153                    just(&Token::Star).to(BinaryOp::Mul),
154                    just(&Token::Slash).to(BinaryOp::Div),
155                    just(&Token::Percent).to(BinaryOp::Mod),
156                ))
157                .then(power.clone())
158                .repeated(),
159                |left, (op, right)| Expression::Binary(Box::new(left), op, Box::new(right)),
160            );
161
162            // Middle precedence: + -
163            let add_sub = mul_div_mod.clone().foldl(
164                choice((
165                    just(&Token::Plus).to(BinaryOp::Add),
166                    just(&Token::Minus).to(BinaryOp::Sub),
167                ))
168                .then(mul_div_mod.clone())
169                .repeated(),
170                |left, (op, right)| Expression::Binary(Box::new(left), op, Box::new(right)),
171            );
172
173            // Comparisons
174            let comparison = add_sub.clone().foldl(
175                choice((
176                    just(&Token::EqEq).to(BinaryOp::Eq),
177                    just(&Token::NotEq).to(BinaryOp::NotEq),
178                    just(&Token::GtEq).to(BinaryOp::GtEq),
179                    just(&Token::LtEq).to(BinaryOp::LtEq),
180                    just(&Token::Gt).to(BinaryOp::Gt),
181                    just(&Token::Lt).to(BinaryOp::Lt),
182                ))
183                .then(add_sub.clone())
184                .repeated(),
185                |left, (op, right)| Expression::Binary(Box::new(left), op, Box::new(right)),
186            );
187
188            // Not (unary)
189            let not_expr = just(&Token::Not)
190                .repeated()
191                .foldr(comparison.clone(), |_op, expr| {
192                    Expression::Unary(UnaryOp::Not, Box::new(expr))
193                })
194                .or(comparison.clone());
195
196            // And
197            let and_expr = not_expr.clone().foldl(
198                just(&Token::And)
199                    .to(BinaryOp::And)
200                    .then(not_expr.clone())
201                    .repeated(),
202                |left, (op, right)| Expression::Binary(Box::new(left), op, Box::new(right)),
203            );
204
205            // Or (lowest precedence)
206            and_expr.clone().foldl(
207                just(&Token::Or)
208                    .to(BinaryOp::Or)
209                    .then(and_expr.clone())
210                    .repeated(),
211                |left, (op, right)| Expression::Binary(Box::new(left), op, Box::new(right)),
212            )
213        });
214
215        // Block: INDENT statements DEDENT
216        let block = just(&Token::Indent)
217            .ignore_then(stmt.clone().repeated().collect())
218            .then_ignore(just(&Token::Dedent));
219
220        // Minecraft command
221        let minecraft_cmd = select_ref! {
222            Token::MinecraftCommand(s) => Statement::MinecraftCommand(format!("/{}", s))
223        };
224
225        // Import
226        let import = choice((
227            just(&Token::Import)
228                .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
229                .map(|module| {
230                    Statement::Import(Import {
231                        module,
232                        items: vec![],
233                    })
234                }),
235            just(&Token::From)
236                .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
237                .then_ignore(just(&Token::Import))
238                .then(
239                    select_ref! { Token::Ident(s) => s.clone() }
240                        .separated_by(just(&Token::Comma))
241                        .allow_trailing()
242                        .collect(),
243                )
244                .map(|(module, items)| Statement::Import(Import { module, items })),
245        ));
246
247        // Global
248        let global = just(&Token::Global)
249            .ignore_then(
250                select_ref! { Token::Ident(s) => s.clone() }
251                    .separated_by(just(&Token::Comma))
252                    .allow_trailing()
253                    .collect(),
254            )
255            .map(Statement::Global);
256
257        // Assignment
258        let assignment = select_ref! { Token::Ident(s) => s.clone() }
259            .then_ignore(just(&Token::Equals))
260            .then(expr.clone())
261            .map(|(target, value)| Statement::Assignment(Assignment { target, value }));
262
263        // Const assignment
264        let const_assignment = just(&Token::Const)
265            .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
266            .then_ignore(just(&Token::Equals))
267            .then(expr.clone())
268            .map(|(target, value)| Statement::ConstAssignment(ConstAssignment { target, value }));
269
270        // Selector definition: @Name = @selector[...]
271        let selector_def = select_ref! { Token::Ident(s) if s.starts_with('@') => s.clone() }
272            .then_ignore(just(&Token::Equals))
273            .then(select_ref! { Token::Ident(s) if s.starts_with('@') => s.clone() })
274            .map(|(name_with_at, selector)| {
275                // Strip @ from name (e.g., "@Player" -> "Player")
276                let name = name_with_at
277                    .strip_prefix('@')
278                    .unwrap_or(&name_with_at)
279                    .to_string();
280                Statement::SelectorDef(SelectorDef { name, selector })
281            });
282
283        // Entity definition: define @Name = @Selector create { ... } end
284        let entity_def = just(&Token::Define)
285            .ignore_then(select_ref! { Token::Ident(s) if s.starts_with('@') => s.clone() })
286            .then_ignore(just(&Token::Equals))
287            .then(select_ref! { Token::Ident(s) if s.starts_with('@') => s.clone() })
288            .then_ignore(just(&Token::Newline).or_not())
289            .then_ignore(just(&Token::Create))
290            .then(expr.clone())
291            .then_ignore(just(&Token::Newline).or_not())
292            .then_ignore(just(&Token::End))
293            .map(|((name_with_at, selector), nbt)| {
294                // Strip @ from name (e.g., "@Player" -> "Player")
295                let name = name_with_at
296                    .strip_prefix('@')
297                    .unwrap_or(&name_with_at)
298                    .to_string();
299                Statement::EntityDef(EntityDef {
300                    name,
301                    selector,
302                    nbt,
303                })
304            });
305
306        // Create entity statement: create @Name
307        let create_entity = just(&Token::Create)
308            .ignore_then(select_ref! { Token::Ident(s) if s.starts_with('@') => s.clone() })
309            .map(|name_with_at| {
310                let name = name_with_at
311                    .strip_prefix('@')
312                    .unwrap_or(&name_with_at)
313                    .to_string();
314                Statement::CreateEntity(name)
315            });
316
317        // Pass
318        let pass = just(&Token::Pass).to(Statement::Pass);
319
320        // Return
321        let return_stmt = just(&Token::Return)
322            .ignore_then(expr.clone().or_not())
323            .map(Statement::Return);
324
325        // Docstring (string literal as statement)
326        let docstring = select_ref! {
327            Token::String(_s) => Statement::Pass  // Ignore docstrings
328        };
329
330        // Function definition
331        let function = just(&Token::Def)
332            .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
333            .then(
334                just(&Token::LParen)
335                    .ignore_then(
336                        select_ref! { Token::Ident(s) => s.clone() }
337                            .separated_by(just(&Token::Comma))
338                            .allow_trailing()
339                            .collect::<Vec<String>>(),
340                    )
341                    .then_ignore(just(&Token::RParen)),
342            )
343            .then_ignore(just(&Token::Colon))
344            .then_ignore(just(&Token::Newline).or_not())
345            .then(block.clone())
346            .map(|((name, params), body)| {
347                Statement::FunctionDef(FunctionDef {
348                    name,
349                    params: params
350                        .into_iter()
351                        .map(|p| Parameter {
352                            name: p,
353                            default: None,
354                        })
355                        .collect(),
356                    body,
357                    decorators: vec![],
358                })
359            });
360
361        // If/elif/else
362        let if_stmt = just(&Token::If)
363            .ignore_then(expr.clone())
364            .then_ignore(just(&Token::Colon))
365            .then_ignore(just(&Token::Newline).or_not())
366            .then(block.clone())
367            .then(
368                just(&Token::Elif)
369                    .ignore_then(expr.clone())
370                    .then_ignore(just(&Token::Colon))
371                    .then_ignore(just(&Token::Newline).or_not())
372                    .then(block.clone())
373                    .repeated()
374                    .collect(),
375            )
376            .then(
377                just(&Token::Else)
378                    .ignore_then(just(&Token::Colon))
379                    .then_ignore(just(&Token::Newline).or_not())
380                    .ignore_then(block.clone())
381                    .or_not(),
382            )
383            .map(|(((condition, then_branch), elif_branches), else_branch)| {
384                Statement::If(IfStatement {
385                    condition,
386                    then_branch,
387                    elif_branches,
388                    else_branch,
389                })
390            });
391
392        // For loop
393        let for_loop = just(&Token::For)
394            .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
395            .then_ignore(just(&Token::In))
396            .then(expr.clone())
397            .then(just(&Token::By).ignore_then(expr.clone()).or_not())
398            .then_ignore(just(&Token::Colon))
399            .then_ignore(just(&Token::Newline).or_not())
400            .then(block.clone())
401            .map(|(((target, iter), step), body)| {
402                Statement::For(ForLoop {
403                    target,
404                    iter,
405                    step,
406                    body,
407                })
408            });
409
410        // While loop
411        let while_loop = just(&Token::While)
412            .ignore_then(expr.clone())
413            .then_ignore(just(&Token::Colon))
414            .then_ignore(just(&Token::Newline).or_not())
415            .then(block.clone())
416            .map(|(condition, body)| Statement::While(WhileLoop { condition, body }));
417
418        // Match pattern
419        let match_pattern = choice((
420            // Wildcard: _
421            just(&Token::Underscore).to(MatchPattern::Wildcard),
422            // Range: expr to expr
423            select_ref! { Token::Number(n) => n.parse::<i32>().unwrap() }
424                .then(
425                    just(&Token::To)
426                        .ignore_then(select_ref! { Token::Number(n) => n.parse::<i32>().unwrap() })
427                        .or_not(),
428                )
429                .map(|(start, end)| {
430                    if let Some(end) = end {
431                        MatchPattern::Range(start, end)
432                    } else {
433                        MatchPattern::Literal(start)
434                    }
435                }),
436        ));
437
438        // Match case
439        let match_case = just(&Token::Case)
440            .ignore_then(match_pattern)
441            .then_ignore(just(&Token::Colon))
442            .then_ignore(just(&Token::Newline).or_not())
443            .then(block.clone())
444            .map(|(pattern, body)| MatchCase { pattern, body });
445
446        // Match statement
447        let match_stmt = just(&Token::Match)
448            .ignore_then(expr.clone())
449            .then_ignore(just(&Token::Colon))
450            .then_ignore(just(&Token::Newline))
451            .then_ignore(just(&Token::Indent))
452            .then(match_case.repeated().at_least(1).collect())
453            .then_ignore(just(&Token::Dedent))
454            .map(|(value, cases)| Statement::Match(MatchStatement { value, cases }));
455
456        // Execute block modifiers
457        // Helper to parse execute condition (for if/unless modifiers)
458        let exec_condition = any()
459            .filter(|t: &Token| !matches!(t, Token::Colon | Token::Newline | Token::As | Token::At))
460            .repeated()
461            .at_least(1)
462            .collect::<Vec<Token>>()
463            .map(|tokens| {
464                // Convert tokens to string, but be smart about spacing
465                let mut result = String::new();
466                let mut prev_token: Option<&Token> = None;
467
468                for (i, token) in tokens.iter().enumerate() {
469                    let token_str = format!("{}", token);
470
471                    // Determine if we need a space before this token
472                    let need_space = if i == 0 {
473                        false
474                    } else if let Some(prev) = prev_token {
475                        match (prev, token) {
476                            // No space between dots (for "..")
477                            (Token::Dot, Token::Dot) => false,
478                            // No space after dot if followed by number (for "..10")
479                            (Token::Dot, Token::Number(_)) => false,
480                            // Space after dot for other cases like "1.. if"
481                            (Token::Dot, _) => true,
482                            // No space before dots
483                            (_, Token::Dot) => false,
484                            // No space between minus and number when it's a negative number
485                            (Token::Minus, Token::Number(_)) => false,
486                            // Default: add space
487                            _ => true,
488                        }
489                    } else {
490                        true
491                    };
492
493                    if need_space {
494                        result.push(' ');
495                    }
496                    result.push_str(&token_str);
497                    prev_token = Some(token);
498                }
499                result
500            });
501
502        // Helper: parse either an identifier or a macro parameter {name}
503        let selector_or_macro = choice((
504            select_ref! { Token::Ident(s) => s.clone() },
505            just(&Token::LBrace)
506                .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
507                .then_ignore(just(&Token::RBrace))
508                .map(|name| format!("{{{}}}", name)),
509        ));
510
511        let execute_modifier = choice((
512            just(&Token::As)
513                .ignore_then(selector_or_macro)
514                .map(ExecuteModifier::As),
515            just(&Token::At)
516                .ignore_then(selector_or_macro)
517                .map(ExecuteModifier::At),
518            // if/unless in execute blocks:
519            // For now, keep as raw and let transpiler determine if it's Python expression
520            just(&Token::If)
521                .ignore_then(exec_condition)
522                .map(ExecuteModifier::IfRaw),
523            just(&Token::Unless)
524                .ignore_then(exec_condition)
525                .map(ExecuteModifier::UnlessRaw),
526            // positioned <coords>
527            select_ref! { Token::Ident(s) if s == "positioned" => s.clone() }
528                .ignore_then(exec_condition)
529                .map(ExecuteModifier::Positioned),
530            // rotated <rotation>
531            select_ref! { Token::Ident(s) if s == "rotated" => s.clone() }
532                .ignore_then(exec_condition)
533                .map(ExecuteModifier::Rotated),
534            // in <dimension>
535            just(&Token::In)
536                .ignore_then(exec_condition)
537                .map(ExecuteModifier::In),
538            // anchored <eyes|feet>
539            select_ref! { Token::Ident(s) if s == "anchored" => s.clone() }
540                .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
541                .map(ExecuteModifier::Anchored),
542            // align <axes>
543            select_ref! { Token::Ident(s) if s == "align" => s.clone() }
544                .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
545                .map(ExecuteModifier::Align),
546            // store result/success ...
547            select_ref! { Token::Ident(s) if s == "store" => s.clone() }
548                .ignore_then(exec_condition)
549                .map(ExecuteModifier::Store),
550        ));
551
552        // Execute block - support all execute modifiers
553        let execute_block = choice((
554            // asat @s: -> as @s at @s:
555            just(&Token::Asat)
556                .ignore_then(select_ref! { Token::Ident(s) => s.clone() })
557                .try_map(|s, span| {
558                    if s.starts_with('@') {
559                        Ok(s)
560                    } else {
561                        Err(Rich::custom(
562                            span,
563                            format!("Expected selector starting with '@', got '{}'", s),
564                        ))
565                    }
566                })
567                .then_ignore(just(&Token::Colon))
568                .then_ignore(just(&Token::Newline).or_not())
569                .then(block.clone())
570                .map(|(selector, body)| {
571                    Statement::Execute(ExecuteBlock {
572                        modifiers: vec![
573                            ExecuteModifier::As(selector),
574                            ExecuteModifier::At("@s".to_string()),
575                        ],
576                        body,
577                    })
578                }),
579            // Any execute modifier(s) followed by colon - supports positioned, in, etc. as first modifier
580            execute_modifier
581                .then(
582                    execute_modifier
583                        .repeated()
584                        .collect::<Vec<ExecuteModifier>>(),
585                )
586                .then_ignore(just(&Token::Colon))
587                .then_ignore(just(&Token::Newline).or_not())
588                .then(block.clone())
589                .map(|((first, rest), body)| {
590                    let mut modifiers = vec![first];
591                    modifiers.extend(rest);
592                    Statement::Execute(ExecuteBlock { modifiers, body })
593                }),
594        ));
595
596        // Expression statement (for function calls)
597        let expr_stmt = expr.clone().map(Statement::Expression);
598
599        // Simple statement (ends with newline)
600        let simple_stmt = choice((
601            docstring,
602            minecraft_cmd,
603            import,
604            global,
605            return_stmt,
606            pass,
607            selector_def,
608            entity_def,
609            create_entity,
610            const_assignment,
611            assignment,
612            expr_stmt,
613        ))
614        .then_ignore(just(&Token::Newline).or_not());
615
616        // Compound statement (has block)
617        let compound_stmt = choice((
618            function,
619            if_stmt,
620            for_loop,
621            while_loop,
622            match_stmt,
623            execute_block,
624        ));
625
626        choice((compound_stmt, simple_stmt))
627    })
628    .repeated()
629    .collect()
630    .then_ignore(just(&Token::Eof))
631    .map(|statements: Vec<Statement>| Program {
632        imports: statements
633            .iter()
634            .filter_map(|s| {
635                if let Statement::Import(imp) = s {
636                    Some(imp.clone())
637                } else {
638                    None
639                }
640            })
641            .collect(),
642        statements: statements
643            .into_iter()
644            .filter(|s| !matches!(s, Statement::Import(_)))
645            .collect(),
646    })
647}
648
649/// Parse source code into AST
650pub fn parse(source: &str) -> Result<Program, Vec<String>> {
651    let tokens = tokenize(source).map_err(|e| vec![e])?;
652
653    let result = token_parser().parse(&tokens);
654
655    match result.into_result() {
656        Ok(program) => Ok(program),
657        Err(errors) => Err(errors
658            .into_iter()
659            .map(|e| format!("{}", e.reason()))
660            .collect()),
661    }
662}
663
664#[cfg(test)]
665mod tests {
666    use super::*;
667
668    #[test]
669    fn test_simple_function() {
670        let source = r#"
671def test():
672    x = 10
673    /say Hello
674"#;
675        let result = parse(source);
676        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
677        let program = result.unwrap();
678        assert_eq!(program.statements.len(), 1);
679    }
680
681    #[test]
682    fn test_if_statement() {
683        let source = r#"
684def test():
685    x = 5
686    if x == 5:
687        /say equal
688"#;
689        let result = parse(source);
690        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
691    }
692
693    #[test]
694    fn test_for_loop() {
695        let source = r#"
696def test():
697    for i in range(5):
698        /say hello
699"#;
700        let result = parse(source);
701        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
702    }
703
704    #[test]
705    fn test_execute_block() {
706        let source = r#"
707def test():
708    as @a at @s:
709        /particle flame ~ ~ ~
710"#;
711        let result = parse(source);
712        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
713    }
714
715    #[test]
716    fn test_asat() {
717        let source = r#"
718def test():
719    asat @s:
720        /say Hello
721"#;
722        let result = parse(source);
723        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
724    }
725
726    #[test]
727    fn test_global() {
728        let source = r#"
729def test():
730    global score
731    score = 10
732"#;
733        let result = parse(source);
734        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
735    }
736
737    #[test]
738    fn test_import() {
739        let source = r#"
740import stdlib
741from stdlib import event
742"#;
743        let result = parse(source);
744        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
745        let program = result.unwrap();
746        assert_eq!(program.imports.len(), 2);
747    }
748}