busbar_sf_agentscript/
lexer.rs

1//! Lexer for AgentScript source code.
2//!
3//! This module provides tokenization of AgentScript source, handling:
4//!
5//! - Keywords (`config`, `topic`, `reasoning`, etc.)
6//! - Operators (`==`, `!=`, `and`, `or`, etc.)
7//! - Literals (strings, numbers, booleans)
8//! - References (`@variables.name`)
9//! - Indentation tracking (INDENT/DEDENT tokens)
10//!
11//! # Indentation Handling
12//!
13//! AgentScript uses significant whitespace like Python. The lexer tracks
14//! indentation levels and emits `INDENT`/`DEDENT` tokens when the level
15//! changes. This is handled by [`lex_with_indentation()`].
16//!
17//! # Example
18//!
19//! ```rust
20//! use busbar_sf_agentscript::lexer::{lexer, lex_with_indentation, Token};
21//! use chumsky::prelude::*;  // For Parser trait
22//!
23//! // Basic tokenization
24//! let tokens = lexer().parse("config:").into_result().unwrap();
25//! assert_eq!(tokens[0].0, Token::Config);
26//!
27//! // With indentation tracking
28//! let source = "config:\n   agent_name: \"Test\"";
29//! let tokens = lex_with_indentation(source).unwrap();
30//! // Contains INDENT token after the newline
31//! ```
32//!
33//! # Token Types
34//!
35//! | Category | Examples |
36//! |----------|----------|
37//! | Keywords | `config`, `variables`, `topic`, `reasoning` |
38//! | Types | `string`, `number`, `boolean`, `list` |
39//! | Operators | `==`, `!=`, `and`, `or`, `not` |
40//! | Literals | `"text"`, `42`, `True`, `False`, `None` |
41//! | Punctuation | `:`, `.`, `@`, `\|`, `->` |
42//! | Indentation | `INDENT`, `DEDENT`, `Newline` |
43
44use chumsky::prelude::*;
45
46/// A token in AgentScript.
47///
48/// Tokens are the atomic units produced by the lexer. Each token represents
49/// a meaningful element of the source code.
50#[derive(Clone, Debug, PartialEq)]
51pub enum Token<'src> {
52    // Keywords (block types)
53    Config,
54    Variables,
55    System,
56    StartAgent,
57    Topic,
58    Actions,
59    Inputs,
60    Outputs,
61    Target,
62    Reasoning,
63    Instructions,
64    BeforeReasoning,
65    AfterReasoning,
66    Messages,
67    Welcome,
68    Error,
69    Connection,  // singular: connection <name>:
70    Connections, // legacy plural form (for error messages)
71    Knowledge,
72    Language,
73
74    // Variable keywords
75    Mutable,
76    Linked,
77    Description,
78    Source,
79    Label,
80
81    // ParamDef keywords
82    IsRequired,
83    IsDisplayable,
84    IsUsedByPlanner,
85    ComplexDataTypeName,
86    FilterFromAgent,
87
88    // ActionDef keywords
89    RequireUserConfirmation,
90    IncludeInProgressIndicator,
91    ProgressIndicatorMessage,
92
93    // Type keywords
94    String,
95    Number,
96    Boolean,
97    Object,
98    List,
99    Date,
100    Timestamp,
101    Currency,
102    Id,
103    Datetime,
104    Time,
105    Integer,
106    Long,
107
108    // Statement keywords
109    If,
110    Else,
111    Run,
112    With,
113    Set,
114    To,
115    As,
116    Transition,
117    Available,
118    When,
119
120    // Literals
121    True,
122    False,
123    None,
124
125    // Operators
126    Eq,     // ==
127    Ne,     // !=
128    Lt,     // <
129    Gt,     // >
130    Le,     // <=
131    Ge,     // >=
132    Assign, // =
133    Is,     // is
134    Not,    // not
135    And,    // and
136    Or,     // or
137    Plus,   // +
138    Minus,  // -
139
140    // Punctuation
141    Colon,        // :
142    Dot,          // .
143    Comma,        // ,
144    At,           // @
145    Pipe,         // |
146    Arrow,        // ->
147    ColonPipe,    // :|
148    ColonArrow,   // :->
149    LParen,       // (
150    RParen,       // )
151    LBracket,     // [
152    RBracket,     // ]
153    LBrace,       // {
154    RBrace,       // }
155    ExclBrace,    // {!
156    DoubleLBrace, // {{
157    DoubleBrace,  // }}
158    Ellipsis,     // ...
159
160    // Additional text punctuation (appears in instruction content)
161    Slash,       // /
162    Question,    // ?
163    Exclamation, // !
164    Dollar,      // $
165    Percent,     // %
166    Star,        // *
167    Ampersand,   // &
168    Semicolon,   // ;
169    Backtick,    // `
170    Tilde,       // ~
171    Caret,       // ^
172    Backslash,   // \
173    Underscore,  // _
174    Apostrophe,  // ' (single quote in text, not a string delimiter)
175
176    // Unicode text (emojis, special symbols, non-ASCII characters)
177    UnicodeText(&'src str),
178
179    // Identifier
180    Ident(&'src str),
181
182    // String literal (content without quotes)
183    StringLit(&'src str),
184
185    // Number literal
186    NumberLit(f64),
187
188    // Comment (text without #)
189    Comment(&'src str),
190
191    // Newline (preserved for indentation tracking)
192    Newline,
193
194    // Indentation tokens (added by post-processing)
195    Indent, // Indentation increased
196    Dedent, // Indentation decreased
197}
198
199impl std::fmt::Display for Token<'_> {
200    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
201        match self {
202            Token::Config => write!(f, "config"),
203            Token::Variables => write!(f, "variables"),
204            Token::System => write!(f, "system"),
205            Token::StartAgent => write!(f, "start_agent"),
206            Token::Topic => write!(f, "topic"),
207            Token::Actions => write!(f, "actions"),
208            Token::Inputs => write!(f, "inputs"),
209            Token::Outputs => write!(f, "outputs"),
210            Token::Target => write!(f, "target"),
211            Token::Reasoning => write!(f, "reasoning"),
212            Token::Instructions => write!(f, "instructions"),
213            Token::BeforeReasoning => write!(f, "before_reasoning"),
214            Token::AfterReasoning => write!(f, "after_reasoning"),
215            Token::Messages => write!(f, "messages"),
216            Token::Welcome => write!(f, "welcome"),
217            Token::Error => write!(f, "error"),
218            Token::Connection => write!(f, "connection"),
219            Token::Connections => write!(f, "connections"),
220            Token::Knowledge => write!(f, "knowledge"),
221            Token::Language => write!(f, "language"),
222            Token::Mutable => write!(f, "mutable"),
223            Token::Linked => write!(f, "linked"),
224            Token::Description => write!(f, "description"),
225            Token::Source => write!(f, "source"),
226            Token::Label => write!(f, "label"),
227            Token::IsRequired => write!(f, "is_required"),
228            Token::IsDisplayable => write!(f, "is_displayable"),
229            Token::IsUsedByPlanner => write!(f, "is_used_by_planner"),
230            Token::ComplexDataTypeName => write!(f, "complex_data_type_name"),
231            Token::FilterFromAgent => write!(f, "filter_from_agent"),
232            Token::RequireUserConfirmation => write!(f, "require_user_confirmation"),
233            Token::IncludeInProgressIndicator => write!(f, "include_in_progress_indicator"),
234            Token::ProgressIndicatorMessage => write!(f, "progress_indicator_message"),
235            Token::String => write!(f, "string"),
236            Token::Number => write!(f, "number"),
237            Token::Boolean => write!(f, "boolean"),
238            Token::Object => write!(f, "object"),
239            Token::List => write!(f, "list"),
240            Token::Date => write!(f, "date"),
241            Token::Timestamp => write!(f, "timestamp"),
242            Token::Currency => write!(f, "currency"),
243            Token::Id => write!(f, "id"),
244            Token::Datetime => write!(f, "datetime"),
245            Token::Time => write!(f, "time"),
246            Token::Integer => write!(f, "integer"),
247            Token::Long => write!(f, "long"),
248            Token::If => write!(f, "if"),
249            Token::Else => write!(f, "else"),
250            Token::Run => write!(f, "run"),
251            Token::With => write!(f, "with"),
252            Token::Set => write!(f, "set"),
253            Token::To => write!(f, "to"),
254            Token::As => write!(f, "as"),
255            Token::Transition => write!(f, "transition"),
256            Token::Available => write!(f, "available"),
257            Token::When => write!(f, "when"),
258            Token::True => write!(f, "True"),
259            Token::False => write!(f, "False"),
260            Token::None => write!(f, "None"),
261            Token::Eq => write!(f, "=="),
262            Token::Ne => write!(f, "!="),
263            Token::Lt => write!(f, "<"),
264            Token::Gt => write!(f, ">"),
265            Token::Le => write!(f, "<="),
266            Token::Ge => write!(f, ">="),
267            Token::Assign => write!(f, "="),
268            Token::Is => write!(f, "is"),
269            Token::Not => write!(f, "not"),
270            Token::And => write!(f, "and"),
271            Token::Or => write!(f, "or"),
272            Token::Plus => write!(f, "+"),
273            Token::Minus => write!(f, "-"),
274            Token::Colon => write!(f, ":"),
275            Token::Dot => write!(f, "."),
276            Token::Comma => write!(f, ","),
277            Token::At => write!(f, "@"),
278            Token::Pipe => write!(f, "|"),
279            Token::Arrow => write!(f, "->"),
280            Token::ColonPipe => write!(f, ":|"),
281            Token::ColonArrow => write!(f, ":->"),
282            Token::LParen => write!(f, "("),
283            Token::RParen => write!(f, ")"),
284            Token::LBracket => write!(f, "["),
285            Token::RBracket => write!(f, "]"),
286            Token::LBrace => write!(f, "{{"),
287            Token::RBrace => write!(f, "}}"),
288            Token::ExclBrace => write!(f, "{{!"),
289            Token::DoubleLBrace => write!(f, "{{{{"),
290            Token::DoubleBrace => write!(f, "}}}}"),
291            Token::Ellipsis => write!(f, "..."),
292            Token::Slash => write!(f, "/"),
293            Token::Question => write!(f, "?"),
294            Token::Exclamation => write!(f, "!"),
295            Token::Dollar => write!(f, "$"),
296            Token::Percent => write!(f, "%"),
297            Token::Star => write!(f, "*"),
298            Token::Ampersand => write!(f, "&"),
299            Token::Semicolon => write!(f, ";"),
300            Token::Backtick => write!(f, "`"),
301            Token::Tilde => write!(f, "~"),
302            Token::Caret => write!(f, "^"),
303            Token::Backslash => write!(f, "\\"),
304            Token::Underscore => write!(f, "_"),
305            Token::Apostrophe => write!(f, "'"),
306            Token::UnicodeText(s) => write!(f, "{}", s),
307            Token::Ident(s) => write!(f, "{}", s),
308            Token::StringLit(s) => write!(f, "\"{}\"", s),
309            Token::NumberLit(n) => write!(f, "{}", n),
310            Token::Comment(s) => write!(f, "# {}", s),
311            Token::Newline => write!(f, "\\n"),
312            Token::Indent => write!(f, "INDENT"),
313            Token::Dedent => write!(f, "DEDENT"),
314        }
315    }
316}
317
318/// Span type for tokens.
319pub type Span = SimpleSpan<usize>;
320
321/// A token with its span.
322pub type Spanned<T> = (T, Span);
323
324/// Create the lexer parser.
325pub fn lexer<'src>(
326) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
327    let comment = just('#')
328        .ignore_then(none_of('\n').repeated().to_slice())
329        .map(Token::Comment);
330
331    // String literals (double-quoted only - single quotes are apostrophes in text)
332    let string_lit = just('"')
333        .ignore_then(none_of('"').repeated().to_slice())
334        .then_ignore(just('"'))
335        .map(Token::StringLit);
336
337    // Number literals
338    let number = text::int(10)
339        .then(just('.').then(text::digits(10)).or_not())
340        .to_slice()
341        .map(|s: &str| Token::NumberLit(s.parse().unwrap()));
342
343    // Multi-character operators (must come before single char versions)
344    let multi_char_ops = choice((
345        just(":->").to(Token::ColonArrow),
346        just(":|").to(Token::ColonPipe),
347        just("->").to(Token::Arrow),
348        just("...").to(Token::Ellipsis),
349        just("==").to(Token::Eq),
350        just("!=").to(Token::Ne),
351        just("<=").to(Token::Le),
352        just(">=").to(Token::Ge),
353        just("{!").to(Token::ExclBrace),
354        just("{{").to(Token::DoubleLBrace),
355        just("}}").to(Token::DoubleBrace),
356    ));
357
358    // Single character operators and punctuation
359    let single_char_ops = choice((
360        just('<').to(Token::Lt),
361        just('>').to(Token::Gt),
362        just('=').to(Token::Assign),
363        just('+').to(Token::Plus),
364        just('-').to(Token::Minus),
365        just(':').to(Token::Colon),
366        just('.').to(Token::Dot),
367        just(',').to(Token::Comma),
368        just('@').to(Token::At),
369        just('|').to(Token::Pipe),
370        just('(').to(Token::LParen),
371        just(')').to(Token::RParen),
372        just('[').to(Token::LBracket),
373        just(']').to(Token::RBracket),
374        just('{').to(Token::LBrace),
375        just('}').to(Token::RBrace),
376    ));
377
378    // Additional punctuation that appears in instruction content
379    let text_punctuation = choice((
380        just('/').to(Token::Slash),
381        just('?').to(Token::Question),
382        just('!').to(Token::Exclamation),
383        just('$').to(Token::Dollar),
384        just('%').to(Token::Percent),
385        just('*').to(Token::Star),
386        just('&').to(Token::Ampersand),
387        just(';').to(Token::Semicolon),
388        just('`').to(Token::Backtick),
389        just('~').to(Token::Tilde),
390        just('^').to(Token::Caret),
391        just('\\').to(Token::Backslash),
392        just('_').to(Token::Underscore),
393        just('\'').to(Token::Apostrophe),
394    ));
395
396    // Unicode text - handles emojis and other non-ASCII characters
397    // Captures sequences of non-ASCII characters (emojis, special symbols, etc.)
398    let unicode_text = any()
399        .filter(|c: &char| !c.is_ascii())
400        .repeated()
401        .at_least(1)
402        .to_slice()
403        .map(Token::UnicodeText);
404
405    // Lex identifiers and keywords in a single pass: parse as ident, then do
406    // a constant-time match to check if it's a keyword. This replaces 63
407    // individual text::keyword() calls that caused O(keywords) backtracking
408    // per identifier token.
409    let ident_or_keyword = text::ident().map(|s: &str| match s {
410        // Block keywords
411        "config" => Token::Config,
412        "variables" => Token::Variables,
413        "system" => Token::System,
414        "start_agent" => Token::StartAgent,
415        "topic" => Token::Topic,
416        "actions" => Token::Actions,
417        "inputs" => Token::Inputs,
418        "outputs" => Token::Outputs,
419        "target" => Token::Target,
420        "reasoning" => Token::Reasoning,
421        "instructions" => Token::Instructions,
422        "before_reasoning" => Token::BeforeReasoning,
423        "after_reasoning" => Token::AfterReasoning,
424        "messages" => Token::Messages,
425        // More keywords
426        "welcome" => Token::Welcome,
427        "error" => Token::Error,
428        "connection" => Token::Connection,
429        "connections" => Token::Connections,
430        "knowledge" => Token::Knowledge,
431        "language" => Token::Language,
432        "mutable" => Token::Mutable,
433        "linked" => Token::Linked,
434        "description" => Token::Description,
435        "source" => Token::Source,
436        "label" => Token::Label,
437        "is_required" => Token::IsRequired,
438        "is_displayable" => Token::IsDisplayable,
439        "is_used_by_planner" => Token::IsUsedByPlanner,
440        "complex_data_type_name" => Token::ComplexDataTypeName,
441        "filter_from_agent" => Token::FilterFromAgent,
442        "require_user_confirmation" => Token::RequireUserConfirmation,
443        "include_in_progress_indicator" => Token::IncludeInProgressIndicator,
444        "progress_indicator_message" => Token::ProgressIndicatorMessage,
445        // Type keywords
446        "string" => Token::String,
447        "number" => Token::Number,
448        "boolean" => Token::Boolean,
449        "object" => Token::Object,
450        "list" => Token::List,
451        "date" => Token::Date,
452        "timestamp" => Token::Timestamp,
453        "currency" => Token::Currency,
454        "datetime" => Token::Datetime,
455        "time" => Token::Time,
456        "integer" => Token::Integer,
457        "long" => Token::Long,
458        "id" => Token::Id,
459        // Statement keywords
460        "if" => Token::If,
461        "else" => Token::Else,
462        "run" => Token::Run,
463        "with" => Token::With,
464        "set" => Token::Set,
465        "to" => Token::To,
466        "as" => Token::As,
467        "transition" => Token::Transition,
468        "available" => Token::Available,
469        "when" => Token::When,
470        // Literal and operator keywords
471        "True" => Token::True,
472        "False" => Token::False,
473        "None" => Token::None,
474        "is" => Token::Is,
475        "not" => Token::Not,
476        "and" => Token::And,
477        "or" => Token::Or,
478        // Not a keyword — regular identifier
479        _ => Token::Ident(s),
480    });
481
482    // Newline
483    let newline = just('\n').to(Token::Newline);
484
485    // All tokens - combine in groups to stay under tuple size limits
486    let token = choice((
487        comment,
488        string_lit,
489        number,
490        multi_char_ops,
491        single_char_ops,
492        text_punctuation,
493        unicode_text,
494        ident_or_keyword,
495        newline,
496    ));
497
498    // Horizontal whitespace (spaces and tabs, but not newlines)
499    let horizontal_ws = one_of(" \t").repeated();
500
501    token
502        .map_with(|tok, e| (tok, e.span()))
503        .padded_by(horizontal_ws)
504        .repeated()
505        .collect()
506}
507
508/// Process raw tokens to add INDENT/DEDENT tokens based on indentation.
509///
510/// Uses Python-style dynamic indentation tracking:
511/// - First indented line sets the indent level for that block
512/// - INDENT is emitted when going to a deeper level
513/// - DEDENT is emitted when returning to a shallower level
514/// - Indent levels are tracked on a stack
515pub fn add_indentation_tokens<'src>(
516    source: &'src str,
517    tokens: Vec<Spanned<Token<'src>>>,
518) -> Vec<Spanned<Token<'src>>> {
519    let mut result = Vec::with_capacity(tokens.len() * 2);
520    let mut indent_stack: Vec<usize> = vec![0]; // Stack of indentation levels (Python-style)
521
522    // Build a map of byte positions to their line indentation
523    let line_indents: Vec<(usize, usize)> = source
524        .lines()
525        .scan(0usize, |pos, line| {
526            let start = *pos;
527            *pos += line.len() + 1; // +1 for newline
528            let indent = line.len() - line.trim_start().len();
529            Some((start, indent))
530        })
531        .collect();
532
533    // Helper to find indentation at a given position (binary search on sorted line_indents)
534    let get_indent_at = |pos: usize| -> usize {
535        match line_indents.binary_search_by_key(&pos, |&(start, _)| start) {
536            Ok(i) => line_indents[i].1,
537            Err(0) => 0,
538            Err(i) => line_indents[i - 1].1,
539        }
540    };
541
542    let mut i = 0;
543    while i < tokens.len() {
544        let (tok, span) = &tokens[i];
545
546        if matches!(tok, Token::Newline) {
547            result.push((tok.clone(), *span));
548
549            // Look at next non-comment, non-newline token to determine indentation
550            let mut next_idx = i + 1;
551            while next_idx < tokens.len() {
552                match &tokens[next_idx].0 {
553                    Token::Comment(_) => {
554                        // Push comments to result, continue looking
555                        result.push(tokens[next_idx].clone());
556                        next_idx += 1;
557                    }
558                    Token::Newline => {
559                        // Skip blank lines
560                        result.push(tokens[next_idx].clone());
561                        next_idx += 1;
562                    }
563                    _ => break,
564                }
565            }
566
567            if next_idx < tokens.len() {
568                let next_span = &tokens[next_idx].1;
569                let new_indent = get_indent_at(next_span.start);
570                let current_indent = *indent_stack.last().unwrap_or(&0);
571
572                if new_indent > current_indent {
573                    // Python-style: push the actual new indent level onto the stack
574                    // This handles any indent size (2, 3, 4 spaces, etc.)
575                    indent_stack.push(new_indent);
576                    result.push((Token::Indent, Span::new((), next_span.start..next_span.start)));
577                } else if new_indent < current_indent {
578                    // Python-style: pop from stack until we find a matching level
579                    // Emit DEDENT for each level we pop
580                    while indent_stack.len() > 1 && *indent_stack.last().unwrap() > new_indent {
581                        indent_stack.pop();
582                        result
583                            .push((Token::Dedent, Span::new((), next_span.start..next_span.start)));
584                    }
585                    // Note: In strict Python, if new_indent doesn't match any stack level,
586                    // it's an IndentationError. We're lenient here and just dedent to nearest.
587                }
588                // If new_indent == current_indent, no change - same level
589            }
590            i = next_idx;
591        } else {
592            result.push((tok.clone(), *span));
593            i += 1;
594        }
595    }
596
597    // Emit remaining DEDENTs at EOF
598    let eof_pos = source.len();
599    while indent_stack.len() > 1 {
600        indent_stack.pop();
601        result.push((Token::Dedent, Span::new((), eof_pos..eof_pos)));
602    }
603
604    result
605}
606
607/// Full lexer that produces indentation-aware tokens.
608pub fn lex_with_indentation<'src>(
609    source: &'src str,
610) -> Result<Vec<Spanned<Token<'src>>>, Vec<Rich<'src, char, Span>>> {
611    let tokens = lexer().parse(source).into_result()?;
612    Ok(add_indentation_tokens(source, tokens))
613}
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618
619    #[test]
620    fn test_basic_tokens() {
621        let input = "config: agent_name";
622        let result = lexer().parse(input).into_result();
623        assert!(result.is_ok());
624        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
625        assert_eq!(tokens, vec![Token::Config, Token::Colon, Token::Ident("agent_name"),]);
626    }
627
628    #[test]
629    fn test_string_literal() {
630        let input = r#""hello world""#;
631        let result = lexer().parse(input).into_result();
632        assert!(result.is_ok());
633        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
634        assert_eq!(tokens, vec![Token::StringLit("hello world")]);
635    }
636
637    #[test]
638    fn test_reference_tokens() {
639        let input = "@variables.user_id";
640        let result = lexer().parse(input).into_result();
641        assert!(result.is_ok());
642        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
643        assert_eq!(
644            tokens,
645            vec![
646                Token::At,
647                Token::Variables,
648                Token::Dot,
649                Token::Ident("user_id"),
650            ]
651        );
652    }
653
654    #[test]
655    fn test_operators() {
656        let input = "== != < > <= >= = + -";
657        let result = lexer().parse(input).into_result();
658        assert!(result.is_ok());
659        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
660        assert_eq!(
661            tokens,
662            vec![
663                Token::Eq,
664                Token::Ne,
665                Token::Lt,
666                Token::Gt,
667                Token::Le,
668                Token::Ge,
669                Token::Assign,
670                Token::Plus,
671                Token::Minus,
672            ]
673        );
674    }
675
676    #[test]
677    fn test_ellipsis() {
678        let input = "with value=...";
679        let result = lexer().parse(input).into_result();
680        assert!(result.is_ok());
681        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
682        assert_eq!(
683            tokens,
684            vec![
685                Token::With,
686                Token::Ident("value"),
687                Token::Assign,
688                Token::Ellipsis
689            ]
690        );
691    }
692
693    #[test]
694    fn test_colon_variants() {
695        let input = ": :| :->";
696        let result = lexer().parse(input).into_result();
697        assert!(result.is_ok());
698        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
699        assert_eq!(tokens, vec![Token::Colon, Token::ColonPipe, Token::ColonArrow]);
700    }
701
702    #[test]
703    fn test_number_literals() {
704        let input = "42 3.15 0";
705        let result = lexer().parse(input).into_result();
706        assert!(result.is_ok());
707        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
708        assert_eq!(
709            tokens,
710            vec![
711                Token::NumberLit(42.0),
712                Token::NumberLit(3.15),
713                Token::NumberLit(0.0),
714            ]
715        );
716    }
717
718    #[test]
719    fn test_interpolation_brace() {
720        let input = "{!@variables.name}";
721        let result = lexer().parse(input).into_result();
722        assert!(result.is_ok());
723        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
724        assert_eq!(
725            tokens,
726            vec![
727                Token::ExclBrace,
728                Token::At,
729                Token::Variables,
730                Token::Dot,
731                Token::Ident("name"),
732                Token::RBrace,
733            ]
734        );
735    }
736
737    #[test]
738    fn test_indentation_tokens() {
739        let input = r#"config:
740   agent_name: "Test"
741   description: "Desc"
742
743topic main:
744   description: "Main"
745"#;
746        let result = lex_with_indentation(input);
747        assert!(result.is_ok());
748        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
749
750        // Should have INDENT after "config:" newline, DEDENT before "topic"
751        assert!(tokens.contains(&Token::Indent));
752        assert!(tokens.contains(&Token::Dedent));
753
754        // Count indents and dedents
755        let indents = tokens.iter().filter(|t| matches!(t, Token::Indent)).count();
756        let dedents = tokens.iter().filter(|t| matches!(t, Token::Dedent)).count();
757
758        // Should balance
759        assert_eq!(indents, dedents, "INDENT/DEDENT should balance");
760    }
761
762    #[test]
763    fn test_nested_indentation() {
764        let input = r#"topic main:
765   reasoning:
766      instructions: "test"
767"#;
768        let result = lex_with_indentation(input);
769        assert!(result.is_ok());
770        let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect();
771
772        // Should have 2 INDENTs and 2 DEDENTs
773        let indents = tokens.iter().filter(|t| matches!(t, Token::Indent)).count();
774        let dedents = tokens.iter().filter(|t| matches!(t, Token::Dedent)).count();
775        assert_eq!(indents, 2, "Should have 2 INDENTs");
776        assert_eq!(dedents, 2, "Should have 2 DEDENTs");
777    }
778}
busbar_sf_agentscript/lexer.rs

busbar_sf_agentscript/
lexer.rs