Skip to main content

sage_parser/
parser.rs

1//! Parser implementation using chumsky.
2//!
3//! This module transforms a token stream into an AST.
4
5use crate::ast::{
6    AgentDecl, BeliefDecl, BinOp, Block, ElseBranch, EventKind, Expr, FieldInit, FnDecl,
7    HandlerDecl, Literal, Param, Program, Stmt, StringPart, StringTemplate, UnaryOp,
8};
9use chumsky::prelude::*;
10use chumsky::BoxedParser;
11use sage_lexer::{Spanned, Token};
12use sage_types::{Ident, Span, TypeExpr};
13use std::ops::Range;
14use std::sync::Arc;
15
16/// Parse error type using byte range spans.
17pub type ParseError = Simple<Token>;
18
19/// Parse a sequence of tokens into a Program AST.
20///
21/// # Errors
22///
23/// Returns parse errors if the token stream doesn't form a valid program.
24#[must_use]
25#[allow(clippy::needless_pass_by_value)] // Arc<str> is cheap to clone and idiomatic here
26pub fn parse(tokens: &[Spanned], source: Arc<str>) -> (Option<Program>, Vec<ParseError>) {
27    let len = source.len();
28
29    // Convert our Spanned tokens to (Token, Range<usize>) for chumsky
30    let token_spans: Vec<(Token, Range<usize>)> = tokens
31        .iter()
32        .map(|s| (s.token.clone(), s.start..s.end))
33        .collect();
34
35    let stream = chumsky::Stream::from_iter(len..len, token_spans.into_iter());
36
37    let (ast, errors) = program_parser(Arc::clone(&source)).parse_recovery(stream);
38
39    (ast, errors)
40}
41
42// =============================================================================
43// Top-level parsers
44// =============================================================================
45
46/// Parser for a complete program.
47#[allow(clippy::needless_pass_by_value)]
48fn program_parser(source: Arc<str>) -> impl Parser<Token, Program, Error = ParseError> {
49    let src = source.clone();
50    let src2 = source.clone();
51
52    // Top-level declarations with recovery - skip to next agent/fn/run on error
53    let top_level = agent_parser(source.clone())
54        .or(fn_parser(source.clone()))
55        .recover_with(skip_then_retry_until([
56            Token::KwAgent,
57            Token::KwFn,
58            Token::KwRun,
59        ]));
60
61    let run_stmt = just(Token::KwRun)
62        .ignore_then(ident_token_parser(src.clone()))
63        .then_ignore(just(Token::Semicolon));
64
65    top_level.repeated().then(run_stmt).map_with_span(
66        move |(items, run_agent), span: Range<usize>| {
67            let mut agents = Vec::new();
68            let mut functions = Vec::new();
69
70            for item in items {
71                match item {
72                    TopLevel::Agent(a) => agents.push(a),
73                    TopLevel::Function(f) => functions.push(f),
74                }
75            }
76
77            Program {
78                agents,
79                functions,
80                run_agent,
81                span: make_span(&src2, span),
82            }
83        },
84    )
85}
86
87/// Helper enum for collecting top-level declarations.
88enum TopLevel {
89    Agent(AgentDecl),
90    Function(FnDecl),
91}
92
93// =============================================================================
94// Agent parsers
95// =============================================================================
96
97/// Parser for an agent declaration.
98#[allow(clippy::needless_pass_by_value)]
99fn agent_parser(source: Arc<str>) -> impl Parser<Token, TopLevel, Error = ParseError> {
100    let src = source.clone();
101    let src2 = source.clone();
102    let src3 = source.clone();
103    let src4 = source.clone();
104
105    let belief = just(Token::KwBelief)
106        .ignore_then(ident_token_parser(src.clone()))
107        .then_ignore(just(Token::Colon))
108        .then(type_parser(src.clone()))
109        .map_with_span(move |(name, ty), span: Range<usize>| BeliefDecl {
110            name,
111            ty,
112            span: make_span(&src, span),
113        });
114
115    let handler = just(Token::KwOn)
116        .ignore_then(event_kind_parser(src2.clone()))
117        .then(block_parser(src2.clone()))
118        .map_with_span(move |(event, body), span: Range<usize>| HandlerDecl {
119            event,
120            body,
121            span: make_span(&src2, span),
122        });
123
124    just(Token::KwAgent)
125        .ignore_then(ident_token_parser(src3.clone()))
126        .then_ignore(just(Token::LBrace))
127        .then(belief.repeated())
128        .then(handler.repeated())
129        .then_ignore(just(Token::RBrace))
130        .map_with_span(move |((name, beliefs), handlers), span: Range<usize>| {
131            TopLevel::Agent(AgentDecl {
132                name,
133                beliefs,
134                handlers,
135                span: make_span(&src4, span),
136            })
137        })
138}
139
140/// Parser for event kinds.
141#[allow(clippy::needless_pass_by_value)]
142fn event_kind_parser(source: Arc<str>) -> impl Parser<Token, EventKind, Error = ParseError> {
143    let src = source.clone();
144
145    let start = just(Token::KwStart).to(EventKind::Start);
146    let stop = just(Token::KwStop).to(EventKind::Stop);
147
148    let message = just(Token::KwMessage)
149        .ignore_then(just(Token::LParen))
150        .ignore_then(ident_token_parser(src.clone()))
151        .then_ignore(just(Token::Colon))
152        .then(type_parser(src))
153        .then_ignore(just(Token::RParen))
154        .map(|(param_name, param_ty)| EventKind::Message {
155            param_name,
156            param_ty,
157        });
158
159    start.or(stop).or(message)
160}
161
162// =============================================================================
163// Function parsers
164// =============================================================================
165
166/// Parser for a function declaration.
167#[allow(clippy::needless_pass_by_value)]
168fn fn_parser(source: Arc<str>) -> impl Parser<Token, TopLevel, Error = ParseError> {
169    let src = source.clone();
170    let src2 = source.clone();
171    let src3 = source.clone();
172
173    let param = ident_token_parser(src.clone())
174        .then_ignore(just(Token::Colon))
175        .then(type_parser(src.clone()))
176        .map_with_span(move |(name, ty), span: Range<usize>| Param {
177            name,
178            ty,
179            span: make_span(&src, span),
180        });
181
182    let params = param
183        .separated_by(just(Token::Comma))
184        .allow_trailing()
185        .delimited_by(just(Token::LParen), just(Token::RParen));
186
187    just(Token::KwFn)
188        .ignore_then(ident_token_parser(src2.clone()))
189        .then(params)
190        .then_ignore(just(Token::Arrow))
191        .then(type_parser(src2.clone()))
192        .then(block_parser(src2))
193        .map_with_span(
194            move |(((name, params), return_ty), body), span: Range<usize>| {
195                TopLevel::Function(FnDecl {
196                    name,
197                    params,
198                    return_ty,
199                    body,
200                    span: make_span(&src3, span),
201                })
202            },
203        )
204}
205
206// =============================================================================
207// Statement parsers
208// =============================================================================
209
210/// Parser for a block of statements.
211/// Uses `boxed()` to reduce type complexity and avoid macOS linker symbol length limits.
212#[allow(clippy::needless_pass_by_value)]
213fn block_parser(source: Arc<str>) -> BoxedParser<'static, Token, Block, ParseError> {
214    let src = source.clone();
215
216    recursive(move |block: Recursive<Token, Block, ParseError>| {
217        let src_inner = src.clone();
218        stmt_parser(src.clone(), block)
219            .repeated()
220            .delimited_by(just(Token::LBrace), just(Token::RBrace))
221            .recover_with(nested_delimiters(
222                Token::LBrace,
223                Token::RBrace,
224                [
225                    (Token::LParen, Token::RParen),
226                    (Token::LBracket, Token::RBracket),
227                ],
228                |_span: Range<usize>| vec![],
229            ))
230            .map_with_span(move |stmts, span: Range<usize>| Block {
231                stmts,
232                span: make_span(&src_inner, span),
233            })
234    })
235    .boxed()
236}
237
238/// Parser for statements.
239#[allow(clippy::needless_pass_by_value)]
240fn stmt_parser(
241    source: Arc<str>,
242    block: impl Parser<Token, Block, Error = ParseError> + Clone + 'static,
243) -> impl Parser<Token, Stmt, Error = ParseError> + Clone {
244    let src = source.clone();
245    let src2 = source.clone();
246    let src3 = source.clone();
247    let src4 = source.clone();
248    let src5 = source.clone();
249    let src6 = source.clone();
250    let src7 = source.clone();
251
252    let let_stmt = just(Token::KwLet)
253        .ignore_then(ident_token_parser(src.clone()))
254        .then(
255            just(Token::Colon)
256                .ignore_then(type_parser(src.clone()))
257                .or_not(),
258        )
259        .then_ignore(just(Token::Eq))
260        .then(expr_parser(src.clone()))
261        .then_ignore(just(Token::Semicolon))
262        .map_with_span(move |((name, ty), value), span: Range<usize>| Stmt::Let {
263            name,
264            ty,
265            value,
266            span: make_span(&src, span),
267        });
268
269    let return_stmt = just(Token::KwReturn)
270        .ignore_then(expr_parser(src2.clone()).or_not())
271        .then_ignore(just(Token::Semicolon))
272        .map_with_span(move |value, span: Range<usize>| Stmt::Return {
273            value,
274            span: make_span(&src2, span),
275        });
276
277    let if_stmt = recursive(|if_stmt| {
278        let src_if = src3.clone();
279        let block_clone = block.clone();
280
281        just(Token::KwIf)
282            .ignore_then(expr_parser(src3.clone()))
283            .then(block_clone.clone())
284            .then(
285                just(Token::KwElse)
286                    .ignore_then(
287                        if_stmt
288                            .map(|s| ElseBranch::ElseIf(Box::new(s)))
289                            .or(block_clone.map(ElseBranch::Block)),
290                    )
291                    .or_not(),
292            )
293            .map_with_span(
294                move |((condition, then_block), else_block), span: Range<usize>| Stmt::If {
295                    condition,
296                    then_block,
297                    else_block,
298                    span: make_span(&src_if, span),
299                },
300            )
301    });
302
303    let for_stmt = just(Token::KwFor)
304        .ignore_then(ident_token_parser(src4.clone()))
305        .then_ignore(just(Token::KwIn))
306        .then(expr_parser(src4.clone()))
307        .then(block.clone())
308        .map_with_span(move |((var, iter), body), span: Range<usize>| Stmt::For {
309            var,
310            iter,
311            body,
312            span: make_span(&src4, span),
313        });
314
315    let while_stmt = just(Token::KwWhile)
316        .ignore_then(expr_parser(src7.clone()))
317        .then(block.clone())
318        .map_with_span(move |(condition, body), span: Range<usize>| Stmt::While {
319            condition,
320            body,
321            span: make_span(&src7, span),
322        });
323
324    let assign_stmt = ident_token_parser(src5.clone())
325        .then_ignore(just(Token::Eq))
326        .then(expr_parser(src5.clone()))
327        .then_ignore(just(Token::Semicolon))
328        .map_with_span(move |(name, value), span: Range<usize>| Stmt::Assign {
329            name,
330            value,
331            span: make_span(&src5, span),
332        });
333
334    let expr_stmt = expr_parser(src6.clone())
335        .then_ignore(just(Token::Semicolon))
336        .map_with_span(move |expr, span: Range<usize>| Stmt::Expr {
337            expr,
338            span: make_span(&src6, span),
339        });
340
341    let_stmt
342        .or(return_stmt)
343        .or(if_stmt)
344        .or(for_stmt)
345        .or(while_stmt)
346        .or(assign_stmt)
347        .or(expr_stmt)
348}
349
350// =============================================================================
351// Expression parsers
352// =============================================================================
353
354/// Parser for expressions (with precedence climbing for binary ops).
355/// Uses `boxed()` to reduce type complexity and avoid macOS linker symbol length limits.
356#[allow(clippy::needless_pass_by_value, clippy::too_many_lines)]
357fn expr_parser(source: Arc<str>) -> BoxedParser<'static, Token, Expr, ParseError> {
358    recursive(move |expr: Recursive<Token, Expr, ParseError>| {
359        let src = source.clone();
360
361        let literal = literal_parser(src.clone());
362        let var = var_parser(src.clone());
363
364        let paren = expr
365            .clone()
366            .delimited_by(just(Token::LParen), just(Token::RParen))
367            .map_with_span({
368                let src = src.clone();
369                move |inner, span: Range<usize>| Expr::Paren {
370                    inner: Box::new(inner),
371                    span: make_span(&src, span),
372                }
373            });
374
375        let list = expr
376            .clone()
377            .separated_by(just(Token::Comma))
378            .allow_trailing()
379            .delimited_by(just(Token::LBracket), just(Token::RBracket))
380            .map_with_span({
381                let src = src.clone();
382                move |elements, span: Range<usize>| Expr::List {
383                    elements,
384                    span: make_span(&src, span),
385                }
386            });
387
388        // self.field or self.method(args)
389        let self_access = just(Token::KwSelf)
390            .ignore_then(just(Token::Dot))
391            .ignore_then(ident_token_parser(src.clone()))
392            .then(
393                expr.clone()
394                    .separated_by(just(Token::Comma))
395                    .allow_trailing()
396                    .delimited_by(just(Token::LParen), just(Token::RParen))
397                    .or_not(),
398            )
399            .map_with_span({
400                let src = src.clone();
401                move |(field, args), span: Range<usize>| match args {
402                    Some(args) => Expr::SelfMethodCall {
403                        method: field,
404                        args,
405                        span: make_span(&src, span),
406                    },
407                    None => Expr::SelfField {
408                        field,
409                        span: make_span(&src, span),
410                    },
411                }
412            });
413
414        // infer("template") or infer("template" -> Type)
415        let infer_expr = just(Token::KwInfer)
416            .ignore_then(just(Token::LParen))
417            .ignore_then(string_template_parser(src.clone()))
418            .then(
419                just(Token::Arrow)
420                    .ignore_then(type_parser(src.clone()))
421                    .or_not(),
422            )
423            .then_ignore(just(Token::RParen))
424            .map_with_span({
425                let src = src.clone();
426                move |(template, result_ty), span: Range<usize>| Expr::Infer {
427                    template,
428                    result_ty,
429                    span: make_span(&src, span),
430                }
431            });
432
433        // spawn Agent { field: value, ... }
434        let field_init = ident_token_parser(src.clone())
435            .then_ignore(just(Token::Colon))
436            .then(expr.clone())
437            .map_with_span({
438                let src = src.clone();
439                move |(name, value), span: Range<usize>| FieldInit {
440                    name,
441                    value,
442                    span: make_span(&src, span),
443                }
444            });
445
446        let spawn_expr = just(Token::KwSpawn)
447            .ignore_then(ident_token_parser(src.clone()))
448            .then_ignore(just(Token::LBrace))
449            .then(field_init.separated_by(just(Token::Comma)).allow_trailing())
450            .then_ignore(just(Token::RBrace))
451            .map_with_span({
452                let src = src.clone();
453                move |(agent, fields), span: Range<usize>| Expr::Spawn {
454                    agent,
455                    fields,
456                    span: make_span(&src, span),
457                }
458            });
459
460        // await expr - we need to handle this carefully to avoid left recursion
461        let await_expr = just(Token::KwAwait)
462            .ignore_then(ident_token_parser(src.clone()).map_with_span({
463                let src = src.clone();
464                move |name, span: Range<usize>| Expr::Var {
465                    name,
466                    span: make_span(&src, span),
467                }
468            }))
469            .map_with_span({
470                let src = src.clone();
471                move |handle, span: Range<usize>| Expr::Await {
472                    handle: Box::new(handle),
473                    span: make_span(&src, span),
474                }
475            });
476
477        // send(handle, message)
478        let send_expr = just(Token::KwSend)
479            .ignore_then(just(Token::LParen))
480            .ignore_then(expr.clone())
481            .then_ignore(just(Token::Comma))
482            .then(expr.clone())
483            .then_ignore(just(Token::RParen))
484            .map_with_span({
485                let src = src.clone();
486                move |(handle, message), span: Range<usize>| Expr::Send {
487                    handle: Box::new(handle),
488                    message: Box::new(message),
489                    span: make_span(&src, span),
490                }
491            });
492
493        // emit(value)
494        let emit_expr = just(Token::KwEmit)
495            .ignore_then(just(Token::LParen))
496            .ignore_then(expr.clone())
497            .then_ignore(just(Token::RParen))
498            .map_with_span({
499                let src = src.clone();
500                move |value, span: Range<usize>| Expr::Emit {
501                    value: Box::new(value),
502                    span: make_span(&src, span),
503                }
504            });
505
506        // function call: name(args)
507        let call_expr = ident_token_parser(src.clone())
508            .then(
509                expr.clone()
510                    .separated_by(just(Token::Comma))
511                    .allow_trailing()
512                    .delimited_by(just(Token::LParen), just(Token::RParen)),
513            )
514            .map_with_span({
515                let src = src.clone();
516                move |(name, args), span: Range<usize>| Expr::Call {
517                    name,
518                    args,
519                    span: make_span(&src, span),
520                }
521            });
522
523        // Atom: the base expression without binary ops
524        // Box early to cut type complexity
525        let atom = infer_expr
526            .or(spawn_expr)
527            .or(await_expr)
528            .or(send_expr)
529            .or(emit_expr)
530            .or(self_access)
531            .or(call_expr)
532            .or(list)
533            .or(paren)
534            .or(literal)
535            .or(var)
536            .boxed();
537
538        // Unary expressions
539        let unary = just(Token::Minus)
540            .to(UnaryOp::Neg)
541            .or(just(Token::Bang).to(UnaryOp::Not))
542            .repeated()
543            .then(atom)
544            .foldr(|op, operand| {
545                let span = operand.span().clone();
546                Expr::Unary {
547                    op,
548                    operand: Box::new(operand),
549                    span,
550                }
551            })
552            .boxed();
553
554        // Binary operators with precedence levels
555        // Level 7: * /
556        let mul_div_op = just(Token::Star)
557            .to(BinOp::Mul)
558            .or(just(Token::Slash).to(BinOp::Div));
559
560        let mul_div = unary
561            .clone()
562            .then(mul_div_op.then(unary.clone()).repeated())
563            .foldl({
564                let src = src.clone();
565                move |left, (op, right)| {
566                    let span = make_span(&src, left.span().start..right.span().end);
567                    Expr::Binary {
568                        op,
569                        left: Box::new(left),
570                        right: Box::new(right),
571                        span,
572                    }
573                }
574            })
575            .boxed();
576
577        // Level 6: + -
578        let add_sub_op = just(Token::Plus)
579            .to(BinOp::Add)
580            .or(just(Token::Minus).to(BinOp::Sub));
581
582        let add_sub = mul_div
583            .clone()
584            .then(add_sub_op.then(mul_div).repeated())
585            .foldl({
586                let src = src.clone();
587                move |left, (op, right)| {
588                    let span = make_span(&src, left.span().start..right.span().end);
589                    Expr::Binary {
590                        op,
591                        left: Box::new(left),
592                        right: Box::new(right),
593                        span,
594                    }
595                }
596            })
597            .boxed();
598
599        // Level 5: ++
600        let concat_op = just(Token::PlusPlus).to(BinOp::Concat);
601
602        let concat = add_sub
603            .clone()
604            .then(concat_op.then(add_sub).repeated())
605            .foldl({
606                let src = src.clone();
607                move |left, (op, right)| {
608                    let span = make_span(&src, left.span().start..right.span().end);
609                    Expr::Binary {
610                        op,
611                        left: Box::new(left),
612                        right: Box::new(right),
613                        span,
614                    }
615                }
616            })
617            .boxed();
618
619        // Level 4: < > <= >=
620        let cmp_op = choice((
621            just(Token::Le).to(BinOp::Le),
622            just(Token::Ge).to(BinOp::Ge),
623            just(Token::Lt).to(BinOp::Lt),
624            just(Token::Gt).to(BinOp::Gt),
625        ));
626
627        let comparison = concat
628            .clone()
629            .then(cmp_op.then(concat).repeated())
630            .foldl({
631                let src = src.clone();
632                move |left, (op, right)| {
633                    let span = make_span(&src, left.span().start..right.span().end);
634                    Expr::Binary {
635                        op,
636                        left: Box::new(left),
637                        right: Box::new(right),
638                        span,
639                    }
640                }
641            })
642            .boxed();
643
644        // Level 3: == !=
645        let eq_op = just(Token::EqEq)
646            .to(BinOp::Eq)
647            .or(just(Token::Ne).to(BinOp::Ne));
648
649        let equality = comparison
650            .clone()
651            .then(eq_op.then(comparison).repeated())
652            .foldl({
653                let src = src.clone();
654                move |left, (op, right)| {
655                    let span = make_span(&src, left.span().start..right.span().end);
656                    Expr::Binary {
657                        op,
658                        left: Box::new(left),
659                        right: Box::new(right),
660                        span,
661                    }
662                }
663            })
664            .boxed();
665
666        // Level 2: &&
667        let and_op = just(Token::And).to(BinOp::And);
668
669        let and = equality
670            .clone()
671            .then(and_op.then(equality).repeated())
672            .foldl({
673                let src = src.clone();
674                move |left, (op, right)| {
675                    let span = make_span(&src, left.span().start..right.span().end);
676                    Expr::Binary {
677                        op,
678                        left: Box::new(left),
679                        right: Box::new(right),
680                        span,
681                    }
682                }
683            })
684            .boxed();
685
686        // Level 1: ||
687        let or_op = just(Token::Or).to(BinOp::Or);
688
689        and.clone().then(or_op.then(and).repeated()).foldl({
690            let src = src.clone();
691            move |left, (op, right)| {
692                let span = make_span(&src, left.span().start..right.span().end);
693                Expr::Binary {
694                    op,
695                    left: Box::new(left),
696                    right: Box::new(right),
697                    span,
698                }
699            }
700        })
701    })
702    .boxed()
703}
704
705// =============================================================================
706// Primitive parsers
707// =============================================================================
708
709/// Create a Span from a Range<usize>.
710fn make_span(source: &Arc<str>, range: Range<usize>) -> Span {
711    Span::new(range.start, range.end, Arc::clone(source))
712}
713
714/// Parser for identifier tokens.
715fn ident_token_parser(source: Arc<str>) -> impl Parser<Token, Ident, Error = ParseError> + Clone {
716    filter_map(move |span: Range<usize>, token| match token {
717        Token::Ident => {
718            let text = &source[span.start..span.end];
719            Ok(Ident::new(text.to_string(), make_span(&source, span)))
720        }
721        _ => Err(Simple::expected_input_found(
722            span,
723            vec![Some(Token::Ident)],
724            Some(token),
725        )),
726    })
727}
728
729/// Parser for variable references.
730fn var_parser(source: Arc<str>) -> impl Parser<Token, Expr, Error = ParseError> + Clone {
731    ident_token_parser(source.clone()).map_with_span(move |name, span: Range<usize>| Expr::Var {
732        name,
733        span: make_span(&source, span),
734    })
735}
736
737/// Parser for type expressions.
738fn type_parser(source: Arc<str>) -> impl Parser<Token, TypeExpr, Error = ParseError> + Clone {
739    recursive(move |ty| {
740        let src = source.clone();
741
742        let primitive = choice((
743            just(Token::TyInt).to(TypeExpr::Int),
744            just(Token::TyFloat).to(TypeExpr::Float),
745            just(Token::TyBool).to(TypeExpr::Bool),
746            just(Token::TyString).to(TypeExpr::String),
747            just(Token::TyUnit).to(TypeExpr::Unit),
748        ));
749
750        let list_ty = just(Token::TyList)
751            .ignore_then(just(Token::Lt))
752            .ignore_then(ty.clone())
753            .then_ignore(just(Token::Gt))
754            .map(|inner| TypeExpr::List(Box::new(inner)));
755
756        let option_ty = just(Token::TyOption)
757            .ignore_then(just(Token::Lt))
758            .ignore_then(ty.clone())
759            .then_ignore(just(Token::Gt))
760            .map(|inner| TypeExpr::Option(Box::new(inner)));
761
762        let inferred_ty = just(Token::TyInferred)
763            .ignore_then(just(Token::Lt))
764            .ignore_then(ty.clone())
765            .then_ignore(just(Token::Gt))
766            .map(|inner| TypeExpr::Inferred(Box::new(inner)));
767
768        let agent_ty = just(Token::TyAgent)
769            .ignore_then(just(Token::Lt))
770            .ignore_then(ident_token_parser(src.clone()))
771            .then_ignore(just(Token::Gt))
772            .map(TypeExpr::Agent);
773
774        let named_ty = ident_token_parser(src).map(TypeExpr::Named);
775
776        primitive
777            .or(list_ty)
778            .or(option_ty)
779            .or(inferred_ty)
780            .or(agent_ty)
781            .or(named_ty)
782    })
783}
784
785/// Parser for literals.
786fn literal_parser(source: Arc<str>) -> impl Parser<Token, Expr, Error = ParseError> + Clone {
787    let src = source.clone();
788    let src2 = source.clone();
789    let src3 = source.clone();
790    let src4 = source.clone();
791    let src5 = source.clone();
792
793    let int_lit = filter_map(move |span: Range<usize>, token| match token {
794        Token::IntLit => {
795            let text = &src[span.start..span.end];
796            text.parse::<i64>()
797                .map(Literal::Int)
798                .map_err(|_| Simple::custom(span, "invalid integer literal"))
799        }
800        _ => Err(Simple::expected_input_found(
801            span,
802            vec![Some(Token::IntLit)],
803            Some(token),
804        )),
805    })
806    .map_with_span(move |value, span: Range<usize>| Expr::Literal {
807        value,
808        span: make_span(&src2, span),
809    });
810
811    let float_lit = filter_map(move |span: Range<usize>, token| match token {
812        Token::FloatLit => {
813            let text = &src3[span.start..span.end];
814            text.parse::<f64>()
815                .map(Literal::Float)
816                .map_err(|_| Simple::custom(span, "invalid float literal"))
817        }
818        _ => Err(Simple::expected_input_found(
819            span,
820            vec![Some(Token::FloatLit)],
821            Some(token),
822        )),
823    })
824    .map_with_span(move |value, span: Range<usize>| Expr::Literal {
825        value,
826        span: make_span(&src4, span),
827    });
828
829    let src6 = source.clone();
830    let string_lit = filter_map(move |span: Range<usize>, token| match token {
831        Token::StringLit => {
832            let text = &src5[span.start..span.end];
833            let inner = &text[1..text.len() - 1];
834            let parts = parse_string_template(inner, &make_span(&src5, span.clone()));
835            Ok(parts)
836        }
837        _ => Err(Simple::expected_input_found(
838            span,
839            vec![Some(Token::StringLit)],
840            Some(token),
841        )),
842    })
843    .map_with_span(move |parts, span: Range<usize>| {
844        let span = make_span(&src6, span);
845        // If no interpolations, use a simple string literal
846        if parts.len() == 1 {
847            if let StringPart::Literal(s) = &parts[0] {
848                return Expr::Literal {
849                    value: Literal::String(s.clone()),
850                    span,
851                };
852            }
853        }
854        // Otherwise, use StringInterp
855        Expr::StringInterp {
856            template: StringTemplate {
857                parts,
858                span: span.clone(),
859            },
860            span,
861        }
862    });
863
864    let bool_lit = just(Token::KwTrue)
865        .to(Literal::Bool(true))
866        .or(just(Token::KwFalse).to(Literal::Bool(false)))
867        .map_with_span(move |value, _span: Range<usize>| Expr::Literal {
868            value,
869            span: Span::dummy(), // bool literals don't carry source
870        });
871
872    int_lit.or(float_lit).or(string_lit).or(bool_lit)
873}
874
875/// Parser for string templates (handles interpolation).
876fn string_template_parser(
877    source: Arc<str>,
878) -> impl Parser<Token, StringTemplate, Error = ParseError> + Clone {
879    filter_map(move |span: Range<usize>, token| match token {
880        Token::StringLit => {
881            let text = &source[span.start..span.end];
882            let inner = &text[1..text.len() - 1];
883            let parts = parse_string_template(inner, &make_span(&source, span.clone()));
884            Ok(StringTemplate {
885                parts,
886                span: make_span(&source, span),
887            })
888        }
889        _ => Err(Simple::expected_input_found(
890            span,
891            vec![Some(Token::StringLit)],
892            Some(token),
893        )),
894    })
895}
896
897/// Parse a string into template parts, handling `{ident}` interpolations.
898fn parse_string_template(s: &str, span: &Span) -> Vec<StringPart> {
899    let mut parts = Vec::new();
900    let mut current = String::new();
901    let mut chars = s.chars().peekable();
902
903    while let Some(ch) = chars.next() {
904        if ch == '{' {
905            if !current.is_empty() {
906                parts.push(StringPart::Literal(std::mem::take(&mut current)));
907            }
908
909            let mut ident_name = String::new();
910            while let Some(&c) = chars.peek() {
911                if c == '}' {
912                    chars.next();
913                    break;
914                }
915                ident_name.push(c);
916                chars.next();
917            }
918
919            if !ident_name.is_empty() {
920                parts.push(StringPart::Interpolation(Ident::new(
921                    ident_name,
922                    span.clone(),
923                )));
924            }
925        } else if ch == '\\' {
926            if let Some(escaped) = chars.next() {
927                current.push(match escaped {
928                    'n' => '\n',
929                    't' => '\t',
930                    'r' => '\r',
931                    '\\' => '\\',
932                    '"' => '"',
933                    '{' => '{',
934                    '}' => '}',
935                    other => other,
936                });
937            }
938        } else {
939            current.push(ch);
940        }
941    }
942
943    if !current.is_empty() {
944        parts.push(StringPart::Literal(current));
945    }
946
947    if parts.is_empty() {
948        parts.push(StringPart::Literal(String::new()));
949    }
950
951    parts
952}
953
954// =============================================================================
955// Tests
956// =============================================================================
957
958#[cfg(test)]
959mod tests {
960    use super::*;
961    use sage_lexer::lex;
962
963    fn parse_str(source: &str) -> (Option<Program>, Vec<ParseError>) {
964        let lex_result = lex(source).expect("lexing should succeed");
965        let source_arc: Arc<str> = Arc::from(source);
966        parse(lex_result.tokens(), source_arc)
967    }
968
969    #[test]
970    fn parse_minimal_program() {
971        let source = r#"
972            agent Main {
973                on start {
974                    emit(42);
975                }
976            }
977            run Main;
978        "#;
979
980        let (prog, errors) = parse_str(source);
981        assert!(errors.is_empty(), "errors: {errors:?}");
982        let prog = prog.expect("should parse");
983
984        assert_eq!(prog.agents.len(), 1);
985        assert_eq!(prog.agents[0].name.name, "Main");
986        assert_eq!(prog.run_agent.name, "Main");
987    }
988
989    #[test]
990    fn parse_agent_with_beliefs() {
991        let source = r#"
992            agent Researcher {
993                belief topic: String
994                belief max_words: Int
995
996                on start {
997                    emit(self.topic);
998                }
999            }
1000            run Researcher;
1001        "#;
1002
1003        let (prog, errors) = parse_str(source);
1004        assert!(errors.is_empty(), "errors: {errors:?}");
1005        let prog = prog.expect("should parse");
1006
1007        assert_eq!(prog.agents[0].beliefs.len(), 2);
1008        assert_eq!(prog.agents[0].beliefs[0].name.name, "topic");
1009        assert_eq!(prog.agents[0].beliefs[1].name.name, "max_words");
1010    }
1011
1012    #[test]
1013    fn parse_multiple_handlers() {
1014        let source = r#"
1015            agent Worker {
1016                on start {
1017                    print("started");
1018                }
1019
1020                on message(msg: String) {
1021                    print(msg);
1022                }
1023
1024                on stop {
1025                    print("stopped");
1026                }
1027            }
1028            run Worker;
1029        "#;
1030
1031        let (prog, errors) = parse_str(source);
1032        assert!(errors.is_empty(), "errors: {errors:?}");
1033        let prog = prog.expect("should parse");
1034
1035        assert_eq!(prog.agents[0].handlers.len(), 3);
1036        assert_eq!(prog.agents[0].handlers[0].event, EventKind::Start);
1037        assert!(matches!(
1038            prog.agents[0].handlers[1].event,
1039            EventKind::Message { .. }
1040        ));
1041        assert_eq!(prog.agents[0].handlers[2].event, EventKind::Stop);
1042    }
1043
1044    #[test]
1045    fn parse_function() {
1046        let source = r#"
1047            fn greet(name: String) -> String {
1048                return "Hello, " ++ name;
1049            }
1050
1051            agent Main {
1052                on start {
1053                    emit(greet("World"));
1054                }
1055            }
1056            run Main;
1057        "#;
1058
1059        let (prog, errors) = parse_str(source);
1060        assert!(errors.is_empty(), "errors: {errors:?}");
1061        let prog = prog.expect("should parse");
1062
1063        assert_eq!(prog.functions.len(), 1);
1064        assert_eq!(prog.functions[0].name.name, "greet");
1065        assert_eq!(prog.functions[0].params.len(), 1);
1066    }
1067
1068    #[test]
1069    fn parse_let_statement() {
1070        let source = r#"
1071            agent Main {
1072                on start {
1073                    let x: Int = 42;
1074                    let y = "hello";
1075                    emit(x);
1076                }
1077            }
1078            run Main;
1079        "#;
1080
1081        let (prog, errors) = parse_str(source);
1082        assert!(errors.is_empty(), "errors: {errors:?}");
1083        let prog = prog.expect("should parse");
1084
1085        let stmts = &prog.agents[0].handlers[0].body.stmts;
1086        assert!(matches!(stmts[0], Stmt::Let { .. }));
1087        assert!(matches!(stmts[1], Stmt::Let { .. }));
1088    }
1089
1090    #[test]
1091    fn parse_if_statement() {
1092        let source = r#"
1093            agent Main {
1094                on start {
1095                    if true {
1096                        emit(1);
1097                    } else {
1098                        emit(2);
1099                    }
1100                }
1101            }
1102            run Main;
1103        "#;
1104
1105        let (prog, errors) = parse_str(source);
1106        assert!(errors.is_empty(), "errors: {errors:?}");
1107        let prog = prog.expect("should parse");
1108
1109        let stmts = &prog.agents[0].handlers[0].body.stmts;
1110        assert!(matches!(stmts[0], Stmt::If { .. }));
1111    }
1112
1113    #[test]
1114    fn parse_for_loop() {
1115        let source = r#"
1116            agent Main {
1117                on start {
1118                    for x in [1, 2, 3] {
1119                        print(x);
1120                    }
1121                    emit(0);
1122                }
1123            }
1124            run Main;
1125        "#;
1126
1127        let (prog, errors) = parse_str(source);
1128        assert!(errors.is_empty(), "errors: {errors:?}");
1129        let prog = prog.expect("should parse");
1130
1131        let stmts = &prog.agents[0].handlers[0].body.stmts;
1132        assert!(matches!(stmts[0], Stmt::For { .. }));
1133    }
1134
1135    #[test]
1136    fn parse_spawn_await() {
1137        let source = r#"
1138            agent Worker {
1139                belief name: String
1140                on start {
1141                    emit(self.name);
1142                }
1143            }
1144
1145            agent Main {
1146                on start {
1147                    let w = spawn Worker { name: "test" };
1148                    let result = await w;
1149                    emit(result);
1150                }
1151            }
1152            run Main;
1153        "#;
1154
1155        let (prog, errors) = parse_str(source);
1156        assert!(errors.is_empty(), "errors: {errors:?}");
1157        prog.expect("should parse");
1158    }
1159
1160    #[test]
1161    fn parse_infer() {
1162        let source = r#"
1163            agent Main {
1164                on start {
1165                    let result = infer("What is 2+2?");
1166                    emit(result);
1167                }
1168            }
1169            run Main;
1170        "#;
1171
1172        let (prog, errors) = parse_str(source);
1173        assert!(errors.is_empty(), "errors: {errors:?}");
1174        prog.expect("should parse");
1175    }
1176
1177    #[test]
1178    fn parse_binary_precedence() {
1179        let source = r#"
1180            agent Main {
1181                on start {
1182                    let x = 2 + 3 * 4;
1183                    emit(x);
1184                }
1185            }
1186            run Main;
1187        "#;
1188
1189        let (prog, errors) = parse_str(source);
1190        assert!(errors.is_empty(), "errors: {errors:?}");
1191        let prog = prog.expect("should parse");
1192
1193        let stmts = &prog.agents[0].handlers[0].body.stmts;
1194        if let Stmt::Let { value, .. } = &stmts[0] {
1195            if let Expr::Binary { op, .. } = value {
1196                assert_eq!(*op, BinOp::Add);
1197            } else {
1198                panic!("expected binary expression");
1199            }
1200        }
1201    }
1202
1203    #[test]
1204    fn parse_string_interpolation() {
1205        let source = r#"
1206            agent Main {
1207                on start {
1208                    let name = "World";
1209                    let msg = infer("Greet {name}");
1210                    emit(msg);
1211                }
1212            }
1213            run Main;
1214        "#;
1215
1216        let (prog, errors) = parse_str(source);
1217        assert!(errors.is_empty(), "errors: {errors:?}");
1218        let prog = prog.expect("should parse");
1219
1220        let stmts = &prog.agents[0].handlers[0].body.stmts;
1221        if let Stmt::Let { value, .. } = &stmts[1] {
1222            if let Expr::Infer { template, .. } = value {
1223                assert!(template.has_interpolations());
1224            } else {
1225                panic!("expected infer expression");
1226            }
1227        }
1228    }
1229
1230    // =========================================================================
1231    // Error recovery tests
1232    // =========================================================================
1233
1234    #[test]
1235    fn recover_from_malformed_agent_continues_to_next() {
1236        // First agent has syntax error, second is valid
1237        let source = r#"
1238            agent Broken {
1239                belief x
1240            }
1241
1242            agent Main {
1243                on start {
1244                    emit(42);
1245                }
1246            }
1247            run Main;
1248        "#;
1249
1250        let (prog, errors) = parse_str(source);
1251        // Should have errors from the broken agent
1252        assert!(!errors.is_empty(), "should have parse errors");
1253        // But should still produce a program with the valid agent
1254        let prog = prog.expect("should produce partial AST");
1255        assert!(prog.agents.iter().any(|a| a.name.name == "Main"));
1256    }
1257
1258    #[test]
1259    fn recover_from_mismatched_braces_in_block() {
1260        let source = r#"
1261            agent Main {
1262                on start {
1263                    let x = [1, 2, 3;
1264                    emit(42);
1265                }
1266            }
1267            run Main;
1268        "#;
1269
1270        let (prog, errors) = parse_str(source);
1271        // Should have errors but still produce an AST
1272        assert!(!errors.is_empty(), "should have parse errors");
1273        assert!(prog.is_some(), "should produce partial AST despite errors");
1274    }
1275
1276    #[test]
1277    fn recover_multiple_errors_reported() {
1278        // Multiple errors in different places
1279        let source = r#"
1280            agent A {
1281                belief
1282            }
1283
1284            agent B {
1285                belief
1286            }
1287
1288            agent Main {
1289                on start {
1290                    emit(42);
1291                }
1292            }
1293            run Main;
1294        "#;
1295
1296        let (_prog, errors) = parse_str(source);
1297        // Should report at least one error from the malformed agents
1298        assert!(!errors.is_empty(), "should report errors");
1299        // Recovery allows parsing to continue even with errors
1300    }
1301}