1use chumsky::prelude::*;
45
46#[derive(Clone, Debug, PartialEq)]
51pub enum Token<'src> {
52 Config,
54 Variables,
55 System,
56 StartAgent,
57 Topic,
58 Actions,
59 Inputs,
60 Outputs,
61 Target,
62 Reasoning,
63 Instructions,
64 BeforeReasoning,
65 AfterReasoning,
66 Messages,
67 Welcome,
68 Error,
69 Connection, Connections, Knowledge,
72 Language,
73
74 Mutable,
76 Linked,
77 Description,
78 Source,
79 Label,
80
81 IsRequired,
83 IsDisplayable,
84 IsUsedByPlanner,
85 ComplexDataTypeName,
86 FilterFromAgent,
87
88 RequireUserConfirmation,
90 IncludeInProgressIndicator,
91 ProgressIndicatorMessage,
92
93 String,
95 Number,
96 Boolean,
97 Object,
98 List,
99 Date,
100 Timestamp,
101 Currency,
102 Id,
103 Datetime,
104 Time,
105 Integer,
106 Long,
107
108 If,
110 Else,
111 Run,
112 With,
113 Set,
114 To,
115 As,
116 Transition,
117 Available,
118 When,
119
120 True,
122 False,
123 None,
124
125 Eq, Ne, Lt, Gt, Le, Ge, Assign, Is, Not, And, Or, Plus, Minus, Colon, Dot, Comma, At, Pipe, Arrow, ColonPipe, ColonArrow, LParen, RParen, LBracket, RBracket, LBrace, RBrace, ExclBrace, DoubleLBrace, DoubleBrace, Ellipsis, Slash, Question, Exclamation, Dollar, Percent, Star, Ampersand, Semicolon, Backtick, Tilde, Caret, Backslash, Underscore, Apostrophe, UnicodeText(&'src str),
178
179 Ident(&'src str),
181
182 StringLit(&'src str),
184
185 NumberLit(f64),
187
188 Comment(&'src str),
190
191 Newline,
193
194 Indent, Dedent, }
198
199impl std::fmt::Display for Token<'_> {
200 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
201 match self {
202 Token::Config => write!(f, "config"),
203 Token::Variables => write!(f, "variables"),
204 Token::System => write!(f, "system"),
205 Token::StartAgent => write!(f, "start_agent"),
206 Token::Topic => write!(f, "topic"),
207 Token::Actions => write!(f, "actions"),
208 Token::Inputs => write!(f, "inputs"),
209 Token::Outputs => write!(f, "outputs"),
210 Token::Target => write!(f, "target"),
211 Token::Reasoning => write!(f, "reasoning"),
212 Token::Instructions => write!(f, "instructions"),
213 Token::BeforeReasoning => write!(f, "before_reasoning"),
214 Token::AfterReasoning => write!(f, "after_reasoning"),
215 Token::Messages => write!(f, "messages"),
216 Token::Welcome => write!(f, "welcome"),
217 Token::Error => write!(f, "error"),
218 Token::Connection => write!(f, "connection"),
219 Token::Connections => write!(f, "connections"),
220 Token::Knowledge => write!(f, "knowledge"),
221 Token::Language => write!(f, "language"),
222 Token::Mutable => write!(f, "mutable"),
223 Token::Linked => write!(f, "linked"),
224 Token::Description => write!(f, "description"),
225 Token::Source => write!(f, "source"),
226 Token::Label => write!(f, "label"),
227 Token::IsRequired => write!(f, "is_required"),
228 Token::IsDisplayable => write!(f, "is_displayable"),
229 Token::IsUsedByPlanner => write!(f, "is_used_by_planner"),
230 Token::ComplexDataTypeName => write!(f, "complex_data_type_name"),
231 Token::FilterFromAgent => write!(f, "filter_from_agent"),
232 Token::RequireUserConfirmation => write!(f, "require_user_confirmation"),
233 Token::IncludeInProgressIndicator => write!(f, "include_in_progress_indicator"),
234 Token::ProgressIndicatorMessage => write!(f, "progress_indicator_message"),
235 Token::String => write!(f, "string"),
236 Token::Number => write!(f, "number"),
237 Token::Boolean => write!(f, "boolean"),
238 Token::Object => write!(f, "object"),
239 Token::List => write!(f, "list"),
240 Token::Date => write!(f, "date"),
241 Token::Timestamp => write!(f, "timestamp"),
242 Token::Currency => write!(f, "currency"),
243 Token::Id => write!(f, "id"),
244 Token::Datetime => write!(f, "datetime"),
245 Token::Time => write!(f, "time"),
246 Token::Integer => write!(f, "integer"),
247 Token::Long => write!(f, "long"),
248 Token::If => write!(f, "if"),
249 Token::Else => write!(f, "else"),
250 Token::Run => write!(f, "run"),
251 Token::With => write!(f, "with"),
252 Token::Set => write!(f, "set"),
253 Token::To => write!(f, "to"),
254 Token::As => write!(f, "as"),
255 Token::Transition => write!(f, "transition"),
256 Token::Available => write!(f, "available"),
257 Token::When => write!(f, "when"),
258 Token::True => write!(f, "True"),
259 Token::False => write!(f, "False"),
260 Token::None => write!(f, "None"),
261 Token::Eq => write!(f, "=="),
262 Token::Ne => write!(f, "!="),
263 Token::Lt => write!(f, "<"),
264 Token::Gt => write!(f, ">"),
265 Token::Le => write!(f, "<="),
266 Token::Ge => write!(f, ">="),
267 Token::Assign => write!(f, "="),
268 Token::Is => write!(f, "is"),
269 Token::Not => write!(f, "not"),
270 Token::And => write!(f, "and"),
271 Token::Or => write!(f, "or"),
272 Token::Plus => write!(f, "+"),
273 Token::Minus => write!(f, "-"),
274 Token::Colon => write!(f, ":"),
275 Token::Dot => write!(f, "."),
276 Token::Comma => write!(f, ","),
277 Token::At => write!(f, "@"),
278 Token::Pipe => write!(f, "|"),
279 Token::Arrow => write!(f, "->"),
280 Token::ColonPipe => write!(f, ":|"),
281 Token::ColonArrow => write!(f, ":->"),
282 Token::LParen => write!(f, "("),
283 Token::RParen => write!(f, ")"),
284 Token::LBracket => write!(f, "["),
285 Token::RBracket => write!(f, "]"),
286 Token::LBrace => write!(f, "{{"),
287 Token::RBrace => write!(f, "}}"),
288 Token::ExclBrace => write!(f, "{{!"),
289 Token::DoubleLBrace => write!(f, "{{{{"),
290 Token::DoubleBrace => write!(f, "}}}}"),
291 Token::Ellipsis => write!(f, "..."),
292 Token::Slash => write!(f, "/"),
293 Token::Question => write!(f, "?"),
294 Token::Exclamation => write!(f, "!"),
295 Token::Dollar => write!(f, "$"),
296 Token::Percent => write!(f, "%"),
297 Token::Star => write!(f, "*"),
298 Token::Ampersand => write!(f, "&"),
299 Token::Semicolon => write!(f, ";"),
300 Token::Backtick => write!(f, "`"),
301 Token::Tilde => write!(f, "~"),
302 Token::Caret => write!(f, "^"),
303 Token::Backslash => write!(f, "\\"),
304 Token::Underscore => write!(f, "_"),
305 Token::Apostrophe => write!(f, "'"),
306 Token::UnicodeText(s) => write!(f, "{}", s),
307 Token::Ident(s) => write!(f, "{}", s),
308 Token::StringLit(s) => write!(f, "\"{}\"", s),
309 Token::NumberLit(n) => write!(f, "{}", n),
310 Token::Comment(s) => write!(f, "# {}", s),
311 Token::Newline => write!(f, "\\n"),
312 Token::Indent => write!(f, "INDENT"),
313 Token::Dedent => write!(f, "DEDENT"),
314 }
315 }
316}
317
318pub type Span = SimpleSpan<usize>;
320
321pub type Spanned<T> = (T, Span);
323
324pub fn lexer<'src>(
326) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
327 let comment = just('#')
328 .ignore_then(none_of('\n').repeated().to_slice())
329 .map(Token::Comment);
330
331 let string_lit = just('"')
333 .ignore_then(none_of('"').repeated().to_slice())
334 .then_ignore(just('"'))
335 .map(Token::StringLit);
336
337 let number = text::int(10)
339 .then(just('.').then(text::digits(10)).or_not())
340 .to_slice()
341 .map(|s: &str| Token::NumberLit(s.parse().unwrap()));
342
343 let multi_char_ops = choice((
345 just(":->").to(Token::ColonArrow),
346 just(":|").to(Token::ColonPipe),
347 just("->").to(Token::Arrow),
348 just("...").to(Token::Ellipsis),
349 just("==").to(Token::Eq),
350 just("!=").to(Token::Ne),
351 just("<=").to(Token::Le),
352 just(">=").to(Token::Ge),
353 just("{!").to(Token::ExclBrace),
354 just("{{").to(Token::DoubleLBrace),
355 just("}}").to(Token::DoubleBrace),
356 ));
357
358 let single_char_ops = choice((
360 just('<').to(Token::Lt),
361 just('>').to(Token::Gt),
362 just('=').to(Token::Assign),
363 just('+').to(Token::Plus),
364 just('-').to(Token::Minus),
365 just(':').to(Token::Colon),
366 just('.').to(Token::Dot),
367 just(',').to(Token::Comma),
368 just('@').to(Token::At),
369 just('|').to(Token::Pipe),
370 just('(').to(Token::LParen),
371 just(')').to(Token::RParen),
372 just('[').to(Token::LBracket),
373 just(']').to(Token::RBracket),
374 just('{').to(Token::LBrace),
375 just('}').to(Token::RBrace),
376 ));
377
378 let text_punctuation = choice((
380 just('/').to(Token::Slash),
381 just('?').to(Token::Question),
382 just('!').to(Token::Exclamation),
383 just('$').to(Token::Dollar),
384 just('%').to(Token::Percent),
385 just('*').to(Token::Star),
386 just('&').to(Token::Ampersand),
387 just(';').to(Token::Semicolon),
388 just('`').to(Token::Backtick),
389 just('~').to(Token::Tilde),
390 just('^').to(Token::Caret),
391 just('\\').to(Token::Backslash),
392 just('_').to(Token::Underscore),
393 just('\'').to(Token::Apostrophe),
394 ));
395
396 let unicode_text = any()
399 .filter(|c: &char| !c.is_ascii())
400 .repeated()
401 .at_least(1)
402 .to_slice()
403 .map(Token::UnicodeText);
404
405 let block_keywords = choice((
407 text::keyword("config").to(Token::Config),
408 text::keyword("variables").to(Token::Variables),
409 text::keyword("system").to(Token::System),
410 text::keyword("start_agent").to(Token::StartAgent),
411 text::keyword("topic").to(Token::Topic),
412 text::keyword("actions").to(Token::Actions),
413 text::keyword("inputs").to(Token::Inputs),
414 text::keyword("outputs").to(Token::Outputs),
415 text::keyword("target").to(Token::Target),
416 text::keyword("reasoning").to(Token::Reasoning),
417 text::keyword("instructions").to(Token::Instructions),
418 text::keyword("before_reasoning").to(Token::BeforeReasoning),
419 text::keyword("after_reasoning").to(Token::AfterReasoning),
420 text::keyword("messages").to(Token::Messages),
421 ));
422
423 let more_keywords = choice((
425 text::keyword("welcome").to(Token::Welcome),
426 text::keyword("error").to(Token::Error),
427 text::keyword("connection").to(Token::Connection),
428 text::keyword("connections").to(Token::Connections),
429 text::keyword("knowledge").to(Token::Knowledge),
430 text::keyword("language").to(Token::Language),
431 text::keyword("mutable").to(Token::Mutable),
432 text::keyword("linked").to(Token::Linked),
433 text::keyword("description").to(Token::Description),
434 text::keyword("source").to(Token::Source),
435 text::keyword("label").to(Token::Label),
436 text::keyword("is_required").to(Token::IsRequired),
437 text::keyword("is_displayable").to(Token::IsDisplayable),
438 text::keyword("is_used_by_planner").to(Token::IsUsedByPlanner),
439 text::keyword("complex_data_type_name").to(Token::ComplexDataTypeName),
440 text::keyword("filter_from_agent").to(Token::FilterFromAgent),
441 text::keyword("require_user_confirmation").to(Token::RequireUserConfirmation),
442 text::keyword("include_in_progress_indicator").to(Token::IncludeInProgressIndicator),
443 text::keyword("progress_indicator_message").to(Token::ProgressIndicatorMessage),
444 ));
445
446 let type_keywords = choice((
448 text::keyword("string").to(Token::String),
449 text::keyword("number").to(Token::Number),
450 text::keyword("boolean").to(Token::Boolean),
451 text::keyword("object").to(Token::Object),
452 text::keyword("list").to(Token::List),
453 text::keyword("date").to(Token::Date),
454 text::keyword("timestamp").to(Token::Timestamp),
455 text::keyword("currency").to(Token::Currency),
456 text::keyword("datetime").to(Token::Datetime),
457 text::keyword("time").to(Token::Time),
458 text::keyword("integer").to(Token::Integer),
459 text::keyword("long").to(Token::Long),
460 text::keyword("id").to(Token::Id),
461 ));
462
463 let stmt_keywords = choice((
465 text::keyword("if").to(Token::If),
466 text::keyword("else").to(Token::Else),
467 text::keyword("run").to(Token::Run),
468 text::keyword("with").to(Token::With),
469 text::keyword("set").to(Token::Set),
470 text::keyword("to").to(Token::To),
471 text::keyword("as").to(Token::As),
472 text::keyword("transition").to(Token::Transition),
473 text::keyword("available").to(Token::Available),
474 text::keyword("when").to(Token::When),
475 ));
476
477 let lit_op_keywords = choice((
479 text::keyword("True").to(Token::True),
480 text::keyword("False").to(Token::False),
481 text::keyword("None").to(Token::None),
482 text::keyword("is").to(Token::Is),
483 text::keyword("not").to(Token::Not),
484 text::keyword("and").to(Token::And),
485 text::keyword("or").to(Token::Or),
486 ));
487
488 let keyword =
490 choice((block_keywords, more_keywords, type_keywords, stmt_keywords, lit_op_keywords));
491
492 let ident = text::ident().map(Token::Ident);
494
495 let newline = just('\n').to(Token::Newline);
497
498 let token = choice((
500 comment,
501 string_lit,
502 number,
503 multi_char_ops,
504 single_char_ops,
505 text_punctuation,
506 unicode_text,
507 keyword,
508 ident,
509 newline,
510 ));
511
512 let horizontal_ws = one_of(" \t").repeated();
514
515 token
516 .map_with(|tok, e| (tok, e.span()))
517 .padded_by(horizontal_ws)
518 .repeated()
519 .collect()
520}
521
522pub fn add_indentation_tokens<'src>(
530 source: &'src str,
531 tokens: Vec<Spanned<Token<'src>>>,
532) -> Vec<Spanned<Token<'src>>> {
533 let mut result = Vec::with_capacity(tokens.len() * 2);
534 let mut indent_stack: Vec<usize> = vec![0]; let line_indents: Vec<(usize, usize)> = source
538 .lines()
539 .scan(0usize, |pos, line| {
540 let start = *pos;
541 *pos += line.len() + 1; let indent = line.len() - line.trim_start().len();
543 Some((start, indent))
544 })
545 .collect();
546
547 let get_indent_at = |pos: usize| -> usize {
549 for (line_start, indent) in line_indents.iter().rev() {
550 if pos >= *line_start {
551 return *indent;
552 }
553 }
554 0
555 };
556
557 let mut i = 0;
558 while i < tokens.len() {
559 let (tok, span) = &tokens[i];
560
561 if matches!(tok, Token::Newline) {
562 result.push((tok.clone(), *span));
563
564 let mut next_idx = i + 1;
566 while next_idx < tokens.len() {
567 match &tokens[next_idx].0 {
568 Token::Comment(_) => {
569 result.push(tokens[next_idx].clone());
571 next_idx += 1;
572 }
573 Token::Newline => {
574 result.push(tokens[next_idx].clone());
576 next_idx += 1;
577 }
578 _ => break,
579 }
580 }
581
582 if next_idx < tokens.len() {
583 let next_span = &tokens[next_idx].1;
584 let new_indent = get_indent_at(next_span.start);
585 let current_indent = *indent_stack.last().unwrap_or(&0);
586
587 if new_indent > current_indent {
588 indent_stack.push(new_indent);
591 result.push((Token::Indent, Span::new((), next_span.start..next_span.start)));
592 } else if new_indent < current_indent {
593 while indent_stack.len() > 1 && *indent_stack.last().unwrap() > new_indent {
596 indent_stack.pop();
597 result
598 .push((Token::Dedent, Span::new((), next_span.start..next_span.start)));
599 }
600 }
603 }
605 i = next_idx;
606 } else {
607 result.push((tok.clone(), *span));
608 i += 1;
609 }
610 }
611
612 let eof_pos = source.len();
614 while indent_stack.len() > 1 {
615 indent_stack.pop();
616 result.push((Token::Dedent, Span::new((), eof_pos..eof_pos)));
617 }
618
619 result
620}
621
622pub fn lex_with_indentation<'src>(
624 source: &'src str,
625) -> Result<Vec<Spanned<Token<'src>>>, Vec<Rich<'src, char, Span>>> {
626 let tokens = lexer().parse(source).into_result()?;
627 Ok(add_indentation_tokens(source, tokens))
628}
629
630#[cfg(test)]
631mod tests {
632 use super::*;
633
634 #[test]
635 fn test_basic_tokens() {
636 let input = "config: agent_name";
637 let result = lexer().parse(input).into_result();
638 assert!(result.is_ok());
639 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
640 assert_eq!(tokens, vec![Token::Config, Token::Colon, Token::Ident("agent_name"),]);
641 }
642
643 #[test]
644 fn test_string_literal() {
645 let input = r#""hello world""#;
646 let result = lexer().parse(input).into_result();
647 assert!(result.is_ok());
648 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
649 assert_eq!(tokens, vec![Token::StringLit("hello world")]);
650 }
651
652 #[test]
653 fn test_reference_tokens() {
654 let input = "@variables.user_id";
655 let result = lexer().parse(input).into_result();
656 assert!(result.is_ok());
657 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
658 assert_eq!(
659 tokens,
660 vec![
661 Token::At,
662 Token::Variables,
663 Token::Dot,
664 Token::Ident("user_id"),
665 ]
666 );
667 }
668
669 #[test]
670 fn test_operators() {
671 let input = "== != < > <= >= = + -";
672 let result = lexer().parse(input).into_result();
673 assert!(result.is_ok());
674 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
675 assert_eq!(
676 tokens,
677 vec![
678 Token::Eq,
679 Token::Ne,
680 Token::Lt,
681 Token::Gt,
682 Token::Le,
683 Token::Ge,
684 Token::Assign,
685 Token::Plus,
686 Token::Minus,
687 ]
688 );
689 }
690
691 #[test]
692 fn test_ellipsis() {
693 let input = "with value=...";
694 let result = lexer().parse(input).into_result();
695 assert!(result.is_ok());
696 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
697 assert_eq!(
698 tokens,
699 vec![
700 Token::With,
701 Token::Ident("value"),
702 Token::Assign,
703 Token::Ellipsis
704 ]
705 );
706 }
707
708 #[test]
709 fn test_colon_variants() {
710 let input = ": :| :->";
711 let result = lexer().parse(input).into_result();
712 assert!(result.is_ok());
713 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
714 assert_eq!(tokens, vec![Token::Colon, Token::ColonPipe, Token::ColonArrow]);
715 }
716
717 #[test]
718 fn test_number_literals() {
719 let input = "42 3.15 0";
720 let result = lexer().parse(input).into_result();
721 assert!(result.is_ok());
722 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
723 assert_eq!(
724 tokens,
725 vec![
726 Token::NumberLit(42.0),
727 Token::NumberLit(3.15),
728 Token::NumberLit(0.0),
729 ]
730 );
731 }
732
733 #[test]
734 fn test_interpolation_brace() {
735 let input = "{!@variables.name}";
736 let result = lexer().parse(input).into_result();
737 assert!(result.is_ok());
738 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
739 assert_eq!(
740 tokens,
741 vec![
742 Token::ExclBrace,
743 Token::At,
744 Token::Variables,
745 Token::Dot,
746 Token::Ident("name"),
747 Token::RBrace,
748 ]
749 );
750 }
751
752 #[test]
753 fn test_indentation_tokens() {
754 let input = r#"config:
755 agent_name: "Test"
756 description: "Desc"
757
758topic main:
759 description: "Main"
760"#;
761 let result = lex_with_indentation(input);
762 assert!(result.is_ok());
763 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
764
765 assert!(tokens.contains(&Token::Indent));
767 assert!(tokens.contains(&Token::Dedent));
768
769 let indents = tokens.iter().filter(|t| matches!(t, Token::Indent)).count();
771 let dedents = tokens.iter().filter(|t| matches!(t, Token::Dedent)).count();
772
773 assert_eq!(indents, dedents, "INDENT/DEDENT should balance");
775 }
776
777 #[test]
778 fn test_nested_indentation() {
779 let input = r#"topic main:
780 reasoning:
781 instructions: "test"
782"#;
783 let result = lex_with_indentation(input);
784 assert!(result.is_ok());
785 let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
786
787 let indents = tokens.iter().filter(|t| matches!(t, Token::Indent)).count();
789 let dedents = tokens.iter().filter(|t| matches!(t, Token::Dedent)).count();
790 assert_eq!(indents, 2, "Should have 2 INDENTs");
791 assert_eq!(dedents, 2, "Should have 2 DEDENTs");
792 }
793}