busbar_sf_agentscript/
lexer.rs

1//! Lexer for AgentScript source code.
2//!
3//! This module provides tokenization of AgentScript source, handling:
4//!
5//! - Keywords (`config`, `topic`, `reasoning`, etc.)
6//! - Operators (`==`, `!=`, `and`, `or`, etc.)
7//! - Literals (strings, numbers, booleans)
8//! - References (`@variables.name`)
9//! - Indentation tracking (INDENT/DEDENT tokens)
10//!
11//! # Indentation Handling
12//!
13//! AgentScript uses significant whitespace like Python. The lexer tracks
14//! indentation levels and emits `INDENT`/`DEDENT` tokens when the level
15//! changes. This is handled by [`lex_with_indentation()`].
16//!
17//! # Example
18//!
19//! ```rust
20//! use busbar_sf_agentscript::lexer::{lexer, lex_with_indentation, Token};
21//! use chumsky::prelude::*;  // For Parser trait
22//!
23//! // Basic tokenization
24//! let tokens = lexer().parse("config:").into_result().unwrap();
25//! assert_eq!(tokens[0].0, Token::Config);
26//!
27//! // With indentation tracking
28//! let source = "config:\n   agent_name: \"Test\"";
29//! let tokens = lex_with_indentation(source).unwrap();
30//! // Contains INDENT token after the newline
31//! ```
32//!
33//! # Token Types
34//!
35//! | Category | Examples |
36//! |----------|----------|
37//! | Keywords | `config`, `variables`, `topic`, `reasoning` |
38//! | Types | `string`, `number`, `boolean`, `list` |
39//! | Operators | `==`, `!=`, `and`, `or`, `not` |
40//! | Literals | `"text"`, `42`, `True`, `False`, `None` |
41//! | Punctuation | `:`, `.`, `@`, `\|`, `->` |
42//! | Indentation | `INDENT`, `DEDENT`, `Newline` |
43
44use chumsky::prelude::*;
45
46/// A token in AgentScript.
47///
48/// Tokens are the atomic units produced by the lexer. Each token represents
49/// a meaningful element of the source code.
50#[derive(Clone, Debug, PartialEq)]
51pub enum Token<'src> {
52    // Keywords (block types)
53    Config,
54    Variables,
55    System,
56    StartAgent,
57    Topic,
58    Actions,
59    Inputs,
60    Outputs,
61    Target,
62    Reasoning,
63    Instructions,
64    BeforeReasoning,
65    AfterReasoning,
66    Messages,
67    Welcome,
68    Error,
69    Connection,  // singular: connection <name>:
70    Connections, // legacy plural form (for error messages)
71    Knowledge,
72    Language,
73
74    // Variable keywords
75    Mutable,
76    Linked,
77    Description,
78    Source,
79    Label,
80
81    // ParamDef keywords
82    IsRequired,
83    IsDisplayable,
84    IsUsedByPlanner,
85    ComplexDataTypeName,
86    FilterFromAgent,
87
88    // ActionDef keywords
89    RequireUserConfirmation,
90    IncludeInProgressIndicator,
91    ProgressIndicatorMessage,
92
93    // Type keywords
94    String,
95    Number,
96    Boolean,
97    Object,
98    List,
99    Date,
100    Timestamp,
101    Currency,
102    Id,
103    Datetime,
104    Time,
105    Integer,
106    Long,
107
108    // Statement keywords
109    If,
110    Else,
111    Run,
112    With,
113    Set,
114    To,
115    As,
116    Transition,
117    Available,
118    When,
119
120    // Literals
121    True,
122    False,
123    None,
124
125    // Operators
126    Eq,     // ==
127    Ne,     // !=
128    Lt,     // <
129    Gt,     // >
130    Le,     // <=
131    Ge,     // >=
132    Assign, // =
133    Is,     // is
134    Not,    // not
135    And,    // and
136    Or,     // or
137    Plus,   // +
138    Minus,  // -
139
140    // Punctuation
141    Colon,        // :
142    Dot,          // .
143    Comma,        // ,
144    At,           // @
145    Pipe,         // |
146    Arrow,        // ->
147    ColonPipe,    // :|
148    ColonArrow,   // :->
149    LParen,       // (
150    RParen,       // )
151    LBracket,     // [
152    RBracket,     // ]
153    LBrace,       // {
154    RBrace,       // }
155    ExclBrace,    // {!
156    DoubleLBrace, // {{
157    DoubleBrace,  // }}
158    Ellipsis,     // ...
159
160    // Additional text punctuation (appears in instruction content)
161    Slash,       // /
162    Question,    // ?
163    Exclamation, // !
164    Dollar,      // $
165    Percent,     // %
166    Star,        // *
167    Ampersand,   // &
168    Semicolon,   // ;
169    Backtick,    // `
170    Tilde,       // ~
171    Caret,       // ^
172    Backslash,   // \
173    Underscore,  // _
174    Apostrophe,  // ' (single quote in text, not a string delimiter)
175
176    // Unicode text (emojis, special symbols, non-ASCII characters)
177    UnicodeText(&'src str),
178
179    // Identifier
180    Ident(&'src str),
181
182    // String literal (content without quotes)
183    StringLit(&'src str),
184
185    // Number literal
186    NumberLit(f64),
187
188    // Comment (text without #)
189    Comment(&'src str),
190
191    // Newline (preserved for indentation tracking)
192    Newline,
193
194    // Indentation tokens (added by post-processing)
195    Indent, // Indentation increased
196    Dedent, // Indentation decreased
197}
198
199impl std::fmt::Display for Token<'_> {
200    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
201        match self {
202            Token::Config => write!(f, "config"),
203            Token::Variables => write!(f, "variables"),
204            Token::System => write!(f, "system"),
205            Token::StartAgent => write!(f, "start_agent"),
206            Token::Topic => write!(f, "topic"),
207            Token::Actions => write!(f, "actions"),
208            Token::Inputs => write!(f, "inputs"),
209            Token::Outputs => write!(f, "outputs"),
210            Token::Target => write!(f, "target"),
211            Token::Reasoning => write!(f, "reasoning"),
212            Token::Instructions => write!(f, "instructions"),
213            Token::BeforeReasoning => write!(f, "before_reasoning"),
214            Token::AfterReasoning => write!(f, "after_reasoning"),
215            Token::Messages => write!(f, "messages"),
216            Token::Welcome => write!(f, "welcome"),
217            Token::Error => write!(f, "error"),
218            Token::Connection => write!(f, "connection"),
219            Token::Connections => write!(f, "connections"),
220            Token::Knowledge => write!(f, "knowledge"),
221            Token::Language => write!(f, "language"),
222            Token::Mutable => write!(f, "mutable"),
223            Token::Linked => write!(f, "linked"),
224            Token::Description => write!(f, "description"),
225            Token::Source => write!(f, "source"),
226            Token::Label => write!(f, "label"),
227            Token::IsRequired => write!(f, "is_required"),
228            Token::IsDisplayable => write!(f, "is_displayable"),
229            Token::IsUsedByPlanner => write!(f, "is_used_by_planner"),
230            Token::ComplexDataTypeName => write!(f, "complex_data_type_name"),
231            Token::FilterFromAgent => write!(f, "filter_from_agent"),
232            Token::RequireUserConfirmation => write!(f, "require_user_confirmation"),
233            Token::IncludeInProgressIndicator => write!(f, "include_in_progress_indicator"),
234            Token::ProgressIndicatorMessage => write!(f, "progress_indicator_message"),
235            Token::String => write!(f, "string"),
236            Token::Number => write!(f, "number"),
237            Token::Boolean => write!(f, "boolean"),
238            Token::Object => write!(f, "object"),
239            Token::List => write!(f, "list"),
240            Token::Date => write!(f, "date"),
241            Token::Timestamp => write!(f, "timestamp"),
242            Token::Currency => write!(f, "currency"),
243            Token::Id => write!(f, "id"),
244            Token::Datetime => write!(f, "datetime"),
245            Token::Time => write!(f, "time"),
246            Token::Integer => write!(f, "integer"),
247            Token::Long => write!(f, "long"),
248            Token::If => write!(f, "if"),
249            Token::Else => write!(f, "else"),
250            Token::Run => write!(f, "run"),
251            Token::With => write!(f, "with"),
252            Token::Set => write!(f, "set"),
253            Token::To => write!(f, "to"),
254            Token::As => write!(f, "as"),
255            Token::Transition => write!(f, "transition"),
256            Token::Available => write!(f, "available"),
257            Token::When => write!(f, "when"),
258            Token::True => write!(f, "True"),
259            Token::False => write!(f, "False"),
260            Token::None => write!(f, "None"),
261            Token::Eq => write!(f, "=="),
262            Token::Ne => write!(f, "!="),
263            Token::Lt => write!(f, "<"),
264            Token::Gt => write!(f, ">"),
265            Token::Le => write!(f, "<="),
266            Token::Ge => write!(f, ">="),
267            Token::Assign => write!(f, "="),
268            Token::Is => write!(f, "is"),
269            Token::Not => write!(f, "not"),
270            Token::And => write!(f, "and"),
271            Token::Or => write!(f, "or"),
272            Token::Plus => write!(f, "+"),
273            Token::Minus => write!(f, "-"),
274            Token::Colon => write!(f, ":"),
275            Token::Dot => write!(f, "."),
276            Token::Comma => write!(f, ","),
277            Token::At => write!(f, "@"),
278            Token::Pipe => write!(f, "|"),
279            Token::Arrow => write!(f, "->"),
280            Token::ColonPipe => write!(f, ":|"),
281            Token::ColonArrow => write!(f, ":->"),
282            Token::LParen => write!(f, "("),
283            Token::RParen => write!(f, ")"),
284            Token::LBracket => write!(f, "["),
285            Token::RBracket => write!(f, "]"),
286            Token::LBrace => write!(f, "{{"),
287            Token::RBrace => write!(f, "}}"),
288            Token::ExclBrace => write!(f, "{{!"),
289            Token::DoubleLBrace => write!(f, "{{{{"),
290            Token::DoubleBrace => write!(f, "}}}}"),
291            Token::Ellipsis => write!(f, "..."),
292            Token::Slash => write!(f, "/"),
293            Token::Question => write!(f, "?"),
294            Token::Exclamation => write!(f, "!"),
295            Token::Dollar => write!(f, "$"),
296            Token::Percent => write!(f, "%"),
297            Token::Star => write!(f, "*"),
298            Token::Ampersand => write!(f, "&"),
299            Token::Semicolon => write!(f, ";"),
300            Token::Backtick => write!(f, "`"),
301            Token::Tilde => write!(f, "~"),
302            Token::Caret => write!(f, "^"),
303            Token::Backslash => write!(f, "\\"),
304            Token::Underscore => write!(f, "_"),
305            Token::Apostrophe => write!(f, "'"),
306            Token::UnicodeText(s) => write!(f, "{}", s),
307            Token::Ident(s) => write!(f, "{}", s),
308            Token::StringLit(s) => write!(f, "\"{}\"", s),
309            Token::NumberLit(n) => write!(f, "{}", n),
310            Token::Comment(s) => write!(f, "# {}", s),
311            Token::Newline => write!(f, "\\n"),
312            Token::Indent => write!(f, "INDENT"),
313            Token::Dedent => write!(f, "DEDENT"),
314        }
315    }
316}
317
318/// Span type for tokens.
319pub type Span = SimpleSpan<usize>;
320
321/// A token with its span.
322pub type Spanned<T> = (T, Span);
323
324/// Create the lexer parser.
325pub fn lexer<'src>(
326) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
327    let comment = just('#')
328        .ignore_then(none_of('\n').repeated().to_slice())
329        .map(Token::Comment);
330
331    // String literals (double-quoted only - single quotes are apostrophes in text)
332    let string_lit = just('"')
333        .ignore_then(none_of('"').repeated().to_slice())
334        .then_ignore(just('"'))
335        .map(Token::StringLit);
336
337    // Number literals
338    let number = text::int(10)
339        .then(just('.').then(text::digits(10)).or_not())
340        .to_slice()
341        .map(|s: &str| Token::NumberLit(s.parse().unwrap()));
342
343    // Multi-character operators (must come before single char versions)
344    let multi_char_ops = choice((
345        just(":->").to(Token::ColonArrow),
346        just(":|").to(Token::ColonPipe),
347        just("->").to(Token::Arrow),
348        just("...").to(Token::Ellipsis),
349        just("==").to(Token::Eq),
350        just("!=").to(Token::Ne),
351        just("<=").to(Token::Le),
352        just(">=").to(Token::Ge),
353        just("{!").to(Token::ExclBrace),
354        just("{{").to(Token::DoubleLBrace),
355        just("}}").to(Token::DoubleBrace),
356    ));
357
358    // Single character operators and punctuation
359    let single_char_ops = choice((
360        just('<').to(Token::Lt),
361        just('>').to(Token::Gt),
362        just('=').to(Token::Assign),
363        just('+').to(Token::Plus),
364        just('-').to(Token::Minus),
365        just(':').to(Token::Colon),
366        just('.').to(Token::Dot),
367        just(',').to(Token::Comma),
368        just('@').to(Token::At),
369        just('|').to(Token::Pipe),
370        just('(').to(Token::LParen),
371        just(')').to(Token::RParen),
372        just('[').to(Token::LBracket),
373        just(']').to(Token::RBracket),
374        just('{').to(Token::LBrace),
375        just('}').to(Token::RBrace),
376    ));
377
378    // Additional punctuation that appears in instruction content
379    let text_punctuation = choice((
380        just('/').to(Token::Slash),
381        just('?').to(Token::Question),
382        just('!').to(Token::Exclamation),
383        just('$').to(Token::Dollar),
384        just('%').to(Token::Percent),
385        just('*').to(Token::Star),
386        just('&').to(Token::Ampersand),
387        just(';').to(Token::Semicolon),
388        just('`').to(Token::Backtick),
389        just('~').to(Token::Tilde),
390        just('^').to(Token::Caret),
391        just('\\').to(Token::Backslash),
392        just('_').to(Token::Underscore),
393        just('\'').to(Token::Apostrophe),
394    ));
395
396    // Unicode text - handles emojis and other non-ASCII characters
397    // Captures sequences of non-ASCII characters (emojis, special symbols, etc.)
398    let unicode_text = any()
399        .filter(|c: &char| !c.is_ascii())
400        .repeated()
401        .at_least(1)
402        .to_slice()
403        .map(Token::UnicodeText);
404
405    // Block keywords
406    let block_keywords = choice((
407        text::keyword("config").to(Token::Config),
408        text::keyword("variables").to(Token::Variables),
409        text::keyword("system").to(Token::System),
410        text::keyword("start_agent").to(Token::StartAgent),
411        text::keyword("topic").to(Token::Topic),
412        text::keyword("actions").to(Token::Actions),
413        text::keyword("inputs").to(Token::Inputs),
414        text::keyword("outputs").to(Token::Outputs),
415        text::keyword("target").to(Token::Target),
416        text::keyword("reasoning").to(Token::Reasoning),
417        text::keyword("instructions").to(Token::Instructions),
418        text::keyword("before_reasoning").to(Token::BeforeReasoning),
419        text::keyword("after_reasoning").to(Token::AfterReasoning),
420        text::keyword("messages").to(Token::Messages),
421    ));
422
423    // More keywords
424    let more_keywords = choice((
425        text::keyword("welcome").to(Token::Welcome),
426        text::keyword("error").to(Token::Error),
427        text::keyword("connection").to(Token::Connection),
428        text::keyword("connections").to(Token::Connections),
429        text::keyword("knowledge").to(Token::Knowledge),
430        text::keyword("language").to(Token::Language),
431        text::keyword("mutable").to(Token::Mutable),
432        text::keyword("linked").to(Token::Linked),
433        text::keyword("description").to(Token::Description),
434        text::keyword("source").to(Token::Source),
435        text::keyword("label").to(Token::Label),
436        text::keyword("is_required").to(Token::IsRequired),
437        text::keyword("is_displayable").to(Token::IsDisplayable),
438        text::keyword("is_used_by_planner").to(Token::IsUsedByPlanner),
439        text::keyword("complex_data_type_name").to(Token::ComplexDataTypeName),
440        text::keyword("filter_from_agent").to(Token::FilterFromAgent),
441        text::keyword("require_user_confirmation").to(Token::RequireUserConfirmation),
442        text::keyword("include_in_progress_indicator").to(Token::IncludeInProgressIndicator),
443        text::keyword("progress_indicator_message").to(Token::ProgressIndicatorMessage),
444    ));
445
446    // Type keywords
447    let type_keywords = choice((
448        text::keyword("string").to(Token::String),
449        text::keyword("number").to(Token::Number),
450        text::keyword("boolean").to(Token::Boolean),
451        text::keyword("object").to(Token::Object),
452        text::keyword("list").to(Token::List),
453        text::keyword("date").to(Token::Date),
454        text::keyword("timestamp").to(Token::Timestamp),
455        text::keyword("currency").to(Token::Currency),
456        text::keyword("datetime").to(Token::Datetime),
457        text::keyword("time").to(Token::Time),
458        text::keyword("integer").to(Token::Integer),
459        text::keyword("long").to(Token::Long),
460        text::keyword("id").to(Token::Id),
461    ));
462
463    // Statement keywords
464    let stmt_keywords = choice((
465        text::keyword("if").to(Token::If),
466        text::keyword("else").to(Token::Else),
467        text::keyword("run").to(Token::Run),
468        text::keyword("with").to(Token::With),
469        text::keyword("set").to(Token::Set),
470        text::keyword("to").to(Token::To),
471        text::keyword("as").to(Token::As),
472        text::keyword("transition").to(Token::Transition),
473        text::keyword("available").to(Token::Available),
474        text::keyword("when").to(Token::When),
475    ));
476
477    // Literal and operator keywords
478    let lit_op_keywords = choice((
479        text::keyword("True").to(Token::True),
480        text::keyword("False").to(Token::False),
481        text::keyword("None").to(Token::None),
482        text::keyword("is").to(Token::Is),
483        text::keyword("not").to(Token::Not),
484        text::keyword("and").to(Token::And),
485        text::keyword("or").to(Token::Or),
486    ));
487
488    // Combine all keywords
489    let keyword =
490        choice((block_keywords, more_keywords, type_keywords, stmt_keywords, lit_op_keywords));
491
492    // Identifier: starts with letter or underscore, followed by alphanumeric or underscore
493    let ident = text::ident().map(Token::Ident);
494
495    // Newline
496    let newline = just('\n').to(Token::Newline);
497
498    // All tokens - combine in groups to stay under tuple size limits
499    let token = choice((
500        comment,
501        string_lit,
502        number,
503        multi_char_ops,
504        single_char_ops,
505        text_punctuation,
506        unicode_text,
507        keyword,
508        ident,
509        newline,
510    ));
511
512    // Horizontal whitespace (spaces and tabs, but not newlines)
513    let horizontal_ws = one_of(" \t").repeated();
514
515    token
516        .map_with(|tok, e| (tok, e.span()))
517        .padded_by(horizontal_ws)
518        .repeated()
519        .collect()
520}
521
522/// Process raw tokens to add INDENT/DEDENT tokens based on indentation.
523///
524/// Uses Python-style dynamic indentation tracking:
525/// - First indented line sets the indent level for that block
526/// - INDENT is emitted when going to a deeper level
527/// - DEDENT is emitted when returning to a shallower level
528/// - Indent levels are tracked on a stack
529pub fn add_indentation_tokens<'src>(
530    source: &'src str,
531    tokens: Vec<Spanned<Token<'src>>>,
532) -> Vec<Spanned<Token<'src>>> {
533    let mut result = Vec::with_capacity(tokens.len() * 2);
534    let mut indent_stack: Vec<usize> = vec![0]; // Stack of indentation levels (Python-style)
535
536    // Build a map of byte positions to their line indentation
537    let line_indents: Vec<(usize, usize)> = source
538        .lines()
539        .scan(0usize, |pos, line| {
540            let start = *pos;
541            *pos += line.len() + 1; // +1 for newline
542            let indent = line.len() - line.trim_start().len();
543            Some((start, indent))
544        })
545        .collect();
546
547    // Helper to find indentation at a given position
548    let get_indent_at = |pos: usize| -> usize {
549        for (line_start, indent) in line_indents.iter().rev() {
550            if pos >= *line_start {
551                return *indent;
552            }
553        }
554        0
555    };
556
557    let mut i = 0;
558    while i < tokens.len() {
559        let (tok, span) = &tokens[i];
560
561        if matches!(tok, Token::Newline) {
562            result.push((tok.clone(), *span));
563
564            // Look at next non-comment, non-newline token to determine indentation
565            let mut next_idx = i + 1;
566            while next_idx < tokens.len() {
567                match &tokens[next_idx].0 {
568                    Token::Comment(_) => {
569                        // Push comments to result, continue looking
570                        result.push(tokens[next_idx].clone());
571                        next_idx += 1;
572                    }
573                    Token::Newline => {
574                        // Skip blank lines
575                        result.push(tokens[next_idx].clone());
576                        next_idx += 1;
577                    }
578                    _ => break,
579                }
580            }
581
582            if next_idx < tokens.len() {
583                let next_span = &tokens[next_idx].1;
584                let new_indent = get_indent_at(next_span.start);
585                let current_indent = *indent_stack.last().unwrap_or(&0);
586
587                if new_indent > current_indent {
588                    // Python-style: push the actual new indent level onto the stack
589                    // This handles any indent size (2, 3, 4 spaces, etc.)
590                    indent_stack.push(new_indent);
591                    result.push((Token::Indent, Span::new((), next_span.start..next_span.start)));
592                } else if new_indent < current_indent {
593                    // Python-style: pop from stack until we find a matching level
594                    // Emit DEDENT for each level we pop
595                    while indent_stack.len() > 1 && *indent_stack.last().unwrap() > new_indent {
596                        indent_stack.pop();
597                        result
598                            .push((Token::Dedent, Span::new((), next_span.start..next_span.start)));
599                    }
600                    // Note: In strict Python, if new_indent doesn't match any stack level,
601                    // it's an IndentationError. We're lenient here and just dedent to nearest.
602                }
603                // If new_indent == current_indent, no change - same level
604            }
605            i = next_idx;
606        } else {
607            result.push((tok.clone(), *span));
608            i += 1;
609        }
610    }
611
612    // Emit remaining DEDENTs at EOF
613    let eof_pos = source.len();
614    while indent_stack.len() > 1 {
615        indent_stack.pop();
616        result.push((Token::Dedent, Span::new((), eof_pos..eof_pos)));
617    }
618
619    result
620}
621
622/// Full lexer that produces indentation-aware tokens.
623pub fn lex_with_indentation<'src>(
624    source: &'src str,
625) -> Result<Vec<Spanned<Token<'src>>>, Vec<Rich<'src, char, Span>>> {
626    let tokens = lexer().parse(source).into_result()?;
627    Ok(add_indentation_tokens(source, tokens))
628}
629
630#[cfg(test)]
631mod tests {
632    use super::*;
633
634    #[test]
635    fn test_basic_tokens() {
636        let input = "config: agent_name";
637        let result = lexer().parse(input).into_result();
638        assert!(result.is_ok());
639        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
640        assert_eq!(tokens, vec![Token::Config, Token::Colon, Token::Ident("agent_name"),]);
641    }
642
643    #[test]
644    fn test_string_literal() {
645        let input = r#""hello world""#;
646        let result = lexer().parse(input).into_result();
647        assert!(result.is_ok());
648        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
649        assert_eq!(tokens, vec![Token::StringLit("hello world")]);
650    }
651
652    #[test]
653    fn test_reference_tokens() {
654        let input = "@variables.user_id";
655        let result = lexer().parse(input).into_result();
656        assert!(result.is_ok());
657        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
658        assert_eq!(
659            tokens,
660            vec![
661                Token::At,
662                Token::Variables,
663                Token::Dot,
664                Token::Ident("user_id"),
665            ]
666        );
667    }
668
669    #[test]
670    fn test_operators() {
671        let input = "== != < > <= >= = + -";
672        let result = lexer().parse(input).into_result();
673        assert!(result.is_ok());
674        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
675        assert_eq!(
676            tokens,
677            vec![
678                Token::Eq,
679                Token::Ne,
680                Token::Lt,
681                Token::Gt,
682                Token::Le,
683                Token::Ge,
684                Token::Assign,
685                Token::Plus,
686                Token::Minus,
687            ]
688        );
689    }
690
691    #[test]
692    fn test_ellipsis() {
693        let input = "with value=...";
694        let result = lexer().parse(input).into_result();
695        assert!(result.is_ok());
696        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
697        assert_eq!(
698            tokens,
699            vec![
700                Token::With,
701                Token::Ident("value"),
702                Token::Assign,
703                Token::Ellipsis
704            ]
705        );
706    }
707
708    #[test]
709    fn test_colon_variants() {
710        let input = ": :| :->";
711        let result = lexer().parse(input).into_result();
712        assert!(result.is_ok());
713        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
714        assert_eq!(tokens, vec![Token::Colon, Token::ColonPipe, Token::ColonArrow]);
715    }
716
717    #[test]
718    fn test_number_literals() {
719        let input = "42 3.15 0";
720        let result = lexer().parse(input).into_result();
721        assert!(result.is_ok());
722        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
723        assert_eq!(
724            tokens,
725            vec![
726                Token::NumberLit(42.0),
727                Token::NumberLit(3.15),
728                Token::NumberLit(0.0),
729            ]
730        );
731    }
732
733    #[test]
734    fn test_interpolation_brace() {
735        let input = "{!@variables.name}";
736        let result = lexer().parse(input).into_result();
737        assert!(result.is_ok());
738        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
739        assert_eq!(
740            tokens,
741            vec![
742                Token::ExclBrace,
743                Token::At,
744                Token::Variables,
745                Token::Dot,
746                Token::Ident("name"),
747                Token::RBrace,
748            ]
749        );
750    }
751
752    #[test]
753    fn test_indentation_tokens() {
754        let input = r#"config:
755   agent_name: "Test"
756   description: "Desc"
757
758topic main:
759   description: "Main"
760"#;
761        let result = lex_with_indentation(input);
762        assert!(result.is_ok());
763        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
764
765        // Should have INDENT after "config:" newline, DEDENT before "topic"
766        assert!(tokens.contains(&Token::Indent));
767        assert!(tokens.contains(&Token::Dedent));
768
769        // Count indents and dedents
770        let indents = tokens.iter().filter(|t| matches!(t, Token::Indent)).count();
771        let dedents = tokens.iter().filter(|t| matches!(t, Token::Dedent)).count();
772
773        // Should balance
774        assert_eq!(indents, dedents, "INDENT/DEDENT should balance");
775    }
776
777    #[test]
778    fn test_nested_indentation() {
779        let input = r#"topic main:
780   reasoning:
781      instructions: "test"
782"#;
783        let result = lex_with_indentation(input);
784        assert!(result.is_ok());
785        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
786
787        // Should have 2 INDENTs and 2 DEDENTs
788        let indents = tokens.iter().filter(|t| matches!(t, Token::Indent)).count();
789        let dedents = tokens.iter().filter(|t| matches!(t, Token::Dedent)).count();
790        assert_eq!(indents, 2, "Should have 2 INDENTs");
791        assert_eq!(dedents, 2, "Should have 2 DEDENTs");
792    }
793}
busbar_sf_agentscript/lexer.rs

busbar_sf_agentscript/
lexer.rs