Skip to main content

sage_parser/
token.rs

1//! Token definitions for the Sage lexer.
2
3use logos::Logos;
4
5/// All tokens in the Sage language.
6#[derive(Logos, Debug, Clone, PartialEq, Eq, Hash)]
7#[logos(skip r"[ \t\r\n]+")]
8#[logos(skip r"//[^\n]*")]
9pub enum Token {
10    // =========================================================================
11    // Keywords
12    // =========================================================================
13    #[token("agent")]
14    KwAgent,
15
16    #[token("belief")]
17    KwBelief,
18
19    #[token("on")]
20    KwOn,
21
22    #[token("start")]
23    KwStart,
24
25    #[token("stop")]
26    KwStop,
27
28    #[token("message")]
29    KwMessage,
30
31    #[token("infer")]
32    KwInfer,
33
34    #[token("spawn")]
35    KwSpawn,
36
37    #[token("await")]
38    KwAwait,
39
40    #[token("send")]
41    KwSend,
42
43    #[token("emit")]
44    KwEmit,
45
46    #[token("run")]
47    KwRun,
48
49    #[token("fn")]
50    KwFn,
51
52    #[token("let")]
53    KwLet,
54
55    #[token("return")]
56    KwReturn,
57
58    #[token("if")]
59    KwIf,
60
61    #[token("else")]
62    KwElse,
63
64    #[token("for")]
65    KwFor,
66
67    #[token("while")]
68    KwWhile,
69
70    #[token("loop")]
71    KwLoop,
72
73    #[token("break")]
74    KwBreak,
75
76    #[token("in")]
77    KwIn,
78
79    #[token("self")]
80    KwSelf,
81
82    #[token("true")]
83    KwTrue,
84
85    #[token("false")]
86    KwFalse,
87
88    #[token("mod")]
89    KwMod,
90
91    #[token("use")]
92    KwUse,
93
94    #[token("pub")]
95    KwPub,
96
97    #[token("as")]
98    KwAs,
99
100    #[token("super")]
101    KwSuper,
102
103    #[token("record")]
104    KwRecord,
105
106    #[token("enum")]
107    KwEnum,
108
109    #[token("match")]
110    KwMatch,
111
112    #[token("const")]
113    KwConst,
114
115    #[token("receives")]
116    KwReceives,
117
118    #[token("receive")]
119    KwReceive,
120
121    #[token("fails")]
122    KwFails,
123
124    #[token("try")]
125    KwTry,
126
127    #[token("catch")]
128    KwCatch,
129
130    #[token("error")]
131    KwError,
132
133    #[token("tool")]
134    KwTool,
135
136    /// RFC-0012: Test declaration keyword.
137    #[token("test")]
138    KwTest,
139
140    /// RFC-0012: Mock keyword for LLM mocking.
141    #[token("mock")]
142    KwMock,
143
144    // =========================================================================
145    // Type keywords
146    // =========================================================================
147    #[token("Int")]
148    TyInt,
149
150    #[token("Float")]
151    TyFloat,
152
153    #[token("Bool")]
154    TyBool,
155
156    #[token("String")]
157    TyString,
158
159    #[token("Unit")]
160    TyUnit,
161
162    #[token("List")]
163    TyList,
164
165    #[token("Option")]
166    TyOption,
167
168    #[token("Inferred")]
169    TyInferred,
170
171    #[token("Agent")]
172    TyAgent,
173
174    #[token("Error")]
175    TyError,
176
177    #[token("ErrorKind")]
178    TyErrorKind,
179
180    /// Function type keyword: `Fn`
181    #[token("Fn")]
182    TyFn,
183
184    /// Map type keyword: `Map`
185    #[token("Map")]
186    TyMap,
187
188    /// Result type keyword: `Result`
189    #[token("Result")]
190    TyResult,
191
192    // =========================================================================
193    // Literals
194    // =========================================================================
195    /// Integer literal (e.g., `42`, `-7`).
196    #[regex(r"-?[0-9]+", priority = 2)]
197    IntLit,
198
199    /// Float literal (e.g., `3.14`, `-0.5`).
200    #[regex(r"-?[0-9]+\.[0-9]+")]
201    FloatLit,
202
203    /// String literal (e.g., `"hello"`).
204    /// Supports escape sequences: \n, \t, \r, \\, \"
205    #[regex(r#""([^"\\]|\\.)*""#)]
206    StringLit,
207
208    // =========================================================================
209    // Identifiers
210    // =========================================================================
211    /// Identifier (e.g., `foo`, `myAgent`, `_private`).
212    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
213    Ident,
214
215    // =========================================================================
216    // Punctuation
217    // =========================================================================
218    #[token("{")]
219    LBrace,
220
221    #[token("}")]
222    RBrace,
223
224    #[token("(")]
225    LParen,
226
227    #[token(")")]
228    RParen,
229
230    #[token("[")]
231    LBracket,
232
233    #[token("]")]
234    RBracket,
235
236    #[token(",")]
237    Comma,
238
239    #[token("::")]
240    ColonColon,
241
242    #[token(":")]
243    Colon,
244
245    #[token(".")]
246    Dot,
247
248    #[token("->")]
249    Arrow,
250
251    #[token("=>")]
252    FatArrow,
253
254    /// Annotation marker for test attributes.
255    #[token("@")]
256    At,
257
258    // =========================================================================
259    // Operators
260    // =========================================================================
261    #[token("=")]
262    Eq,
263
264    #[token("==")]
265    EqEq,
266
267    #[token("!=")]
268    Ne,
269
270    #[token("<")]
271    Lt,
272
273    #[token(">")]
274    Gt,
275
276    #[token("<=")]
277    Le,
278
279    #[token(">=")]
280    Ge,
281
282    #[token("+")]
283    Plus,
284
285    #[token("-")]
286    Minus,
287
288    #[token("*")]
289    Star,
290
291    #[token("/")]
292    Slash,
293
294    #[token("!")]
295    Bang,
296
297    #[token("&&")]
298    And,
299
300    #[token("||")]
301    Or,
302
303    /// Single pipe for closure parameters: `|`
304    #[token("|")]
305    Pipe,
306
307    /// String concatenation operator.
308    #[token("++")]
309    PlusPlus,
310
311    /// Modulo/remainder operator.
312    #[token("%")]
313    Percent,
314
315    /// Statement terminator.
316    #[token(";")]
317    Semicolon,
318}
319
320impl Token {
321    /// Returns true if this token is a keyword.
322    #[must_use]
323    pub fn is_keyword(&self) -> bool {
324        matches!(
325            self,
326            Token::KwAgent
327                | Token::KwBelief
328                | Token::KwOn
329                | Token::KwStart
330                | Token::KwStop
331                | Token::KwMessage
332                | Token::KwInfer
333                | Token::KwSpawn
334                | Token::KwAwait
335                | Token::KwSend
336                | Token::KwEmit
337                | Token::KwRun
338                | Token::KwFn
339                | Token::KwLet
340                | Token::KwReturn
341                | Token::KwIf
342                | Token::KwElse
343                | Token::KwFor
344                | Token::KwWhile
345                | Token::KwLoop
346                | Token::KwBreak
347                | Token::KwIn
348                | Token::KwSelf
349                | Token::KwTrue
350                | Token::KwFalse
351                | Token::KwMod
352                | Token::KwUse
353                | Token::KwPub
354                | Token::KwAs
355                | Token::KwSuper
356                | Token::KwRecord
357                | Token::KwEnum
358                | Token::KwMatch
359                | Token::KwConst
360                | Token::KwReceives
361                | Token::KwReceive
362                | Token::KwFails
363                | Token::KwTry
364                | Token::KwCatch
365                | Token::KwError
366                | Token::KwTool
367        )
368    }
369
370    /// Returns true if this token is a type keyword.
371    #[must_use]
372    pub fn is_type_keyword(&self) -> bool {
373        matches!(
374            self,
375            Token::TyInt
376                | Token::TyFloat
377                | Token::TyBool
378                | Token::TyString
379                | Token::TyUnit
380                | Token::TyList
381                | Token::TyOption
382                | Token::TyInferred
383                | Token::TyAgent
384                | Token::TyError
385                | Token::TyErrorKind
386                | Token::TyFn
387                | Token::TyMap
388                | Token::TyResult
389        )
390    }
391
392    /// Returns true if this token is a literal.
393    #[must_use]
394    pub fn is_literal(&self) -> bool {
395        matches!(
396            self,
397            Token::IntLit | Token::FloatLit | Token::StringLit | Token::KwTrue | Token::KwFalse
398        )
399    }
400
401    /// Returns true if this token is an operator.
402    #[must_use]
403    pub fn is_operator(&self) -> bool {
404        matches!(
405            self,
406            Token::Eq
407                | Token::EqEq
408                | Token::Ne
409                | Token::Lt
410                | Token::Gt
411                | Token::Le
412                | Token::Ge
413                | Token::Plus
414                | Token::Minus
415                | Token::Star
416                | Token::Slash
417                | Token::Percent
418                | Token::Bang
419                | Token::And
420                | Token::Or
421                | Token::PlusPlus
422        )
423    }
424}
425
426impl std::fmt::Display for Token {
427    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
428        match self {
429            // Keywords
430            Token::KwAgent => write!(f, "agent"),
431            Token::KwBelief => write!(f, "belief"),
432            Token::KwOn => write!(f, "on"),
433            Token::KwStart => write!(f, "start"),
434            Token::KwStop => write!(f, "stop"),
435            Token::KwMessage => write!(f, "message"),
436            Token::KwInfer => write!(f, "infer"),
437            Token::KwSpawn => write!(f, "spawn"),
438            Token::KwAwait => write!(f, "await"),
439            Token::KwSend => write!(f, "send"),
440            Token::KwEmit => write!(f, "emit"),
441            Token::KwRun => write!(f, "run"),
442            Token::KwFn => write!(f, "fn"),
443            Token::KwLet => write!(f, "let"),
444            Token::KwReturn => write!(f, "return"),
445            Token::KwIf => write!(f, "if"),
446            Token::KwElse => write!(f, "else"),
447            Token::KwFor => write!(f, "for"),
448            Token::KwWhile => write!(f, "while"),
449            Token::KwLoop => write!(f, "loop"),
450            Token::KwBreak => write!(f, "break"),
451            Token::KwIn => write!(f, "in"),
452            Token::KwSelf => write!(f, "self"),
453            Token::KwTrue => write!(f, "true"),
454            Token::KwFalse => write!(f, "false"),
455            Token::KwMod => write!(f, "mod"),
456            Token::KwUse => write!(f, "use"),
457            Token::KwPub => write!(f, "pub"),
458            Token::KwAs => write!(f, "as"),
459            Token::KwSuper => write!(f, "super"),
460            Token::KwRecord => write!(f, "record"),
461            Token::KwEnum => write!(f, "enum"),
462            Token::KwMatch => write!(f, "match"),
463            Token::KwConst => write!(f, "const"),
464            Token::KwReceives => write!(f, "receives"),
465            Token::KwReceive => write!(f, "receive"),
466            Token::KwFails => write!(f, "fails"),
467            Token::KwTry => write!(f, "try"),
468            Token::KwCatch => write!(f, "catch"),
469            Token::KwError => write!(f, "error"),
470            Token::KwTool => write!(f, "tool"),
471            Token::KwTest => write!(f, "test"),
472            Token::KwMock => write!(f, "mock"),
473
474            // Type keywords
475            Token::TyInt => write!(f, "Int"),
476            Token::TyFloat => write!(f, "Float"),
477            Token::TyBool => write!(f, "Bool"),
478            Token::TyString => write!(f, "String"),
479            Token::TyUnit => write!(f, "Unit"),
480            Token::TyList => write!(f, "List"),
481            Token::TyOption => write!(f, "Option"),
482            Token::TyInferred => write!(f, "Inferred"),
483            Token::TyAgent => write!(f, "Agent"),
484            Token::TyError => write!(f, "Error"),
485            Token::TyErrorKind => write!(f, "ErrorKind"),
486            Token::TyFn => write!(f, "Fn"),
487            Token::TyMap => write!(f, "Map"),
488            Token::TyResult => write!(f, "Result"),
489
490            // Literals
491            Token::IntLit => write!(f, "<int>"),
492            Token::FloatLit => write!(f, "<float>"),
493            Token::StringLit => write!(f, "<string>"),
494
495            // Identifier
496            Token::Ident => write!(f, "<ident>"),
497
498            // Punctuation
499            Token::LBrace => write!(f, "{{"),
500            Token::RBrace => write!(f, "}}"),
501            Token::LParen => write!(f, "("),
502            Token::RParen => write!(f, ")"),
503            Token::LBracket => write!(f, "["),
504            Token::RBracket => write!(f, "]"),
505            Token::Comma => write!(f, ","),
506            Token::ColonColon => write!(f, "::"),
507            Token::Colon => write!(f, ":"),
508            Token::Dot => write!(f, "."),
509            Token::Arrow => write!(f, "->"),
510            Token::FatArrow => write!(f, "=>"),
511            Token::At => write!(f, "@"),
512
513            // Operators
514            Token::Eq => write!(f, "="),
515            Token::EqEq => write!(f, "=="),
516            Token::Ne => write!(f, "!="),
517            Token::Lt => write!(f, "<"),
518            Token::Gt => write!(f, ">"),
519            Token::Le => write!(f, "<="),
520            Token::Ge => write!(f, ">="),
521            Token::Plus => write!(f, "+"),
522            Token::Minus => write!(f, "-"),
523            Token::Star => write!(f, "*"),
524            Token::Slash => write!(f, "/"),
525            Token::Bang => write!(f, "!"),
526            Token::And => write!(f, "&&"),
527            Token::Or => write!(f, "||"),
528            Token::Pipe => write!(f, "|"),
529            Token::PlusPlus => write!(f, "++"),
530            Token::Percent => write!(f, "%"),
531            Token::Semicolon => write!(f, ";"),
532        }
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    #[test]
541    fn lex_keywords() {
542        let mut lexer = Token::lexer("agent belief on start stop message");
543        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
544        assert_eq!(lexer.next(), Some(Ok(Token::KwBelief)));
545        assert_eq!(lexer.next(), Some(Ok(Token::KwOn)));
546        assert_eq!(lexer.next(), Some(Ok(Token::KwStart)));
547        assert_eq!(lexer.next(), Some(Ok(Token::KwStop)));
548        assert_eq!(lexer.next(), Some(Ok(Token::KwMessage)));
549        assert_eq!(lexer.next(), None);
550    }
551
552    #[test]
553    fn lex_more_keywords() {
554        let mut lexer = Token::lexer(
555            "infer spawn await send emit run fn let return if else for in self true false",
556        );
557        assert_eq!(lexer.next(), Some(Ok(Token::KwInfer)));
558        assert_eq!(lexer.next(), Some(Ok(Token::KwSpawn)));
559        assert_eq!(lexer.next(), Some(Ok(Token::KwAwait)));
560        assert_eq!(lexer.next(), Some(Ok(Token::KwSend)));
561        assert_eq!(lexer.next(), Some(Ok(Token::KwEmit)));
562        assert_eq!(lexer.next(), Some(Ok(Token::KwRun)));
563        assert_eq!(lexer.next(), Some(Ok(Token::KwFn)));
564        assert_eq!(lexer.next(), Some(Ok(Token::KwLet)));
565        assert_eq!(lexer.next(), Some(Ok(Token::KwReturn)));
566        assert_eq!(lexer.next(), Some(Ok(Token::KwIf)));
567        assert_eq!(lexer.next(), Some(Ok(Token::KwElse)));
568        assert_eq!(lexer.next(), Some(Ok(Token::KwFor)));
569        assert_eq!(lexer.next(), Some(Ok(Token::KwIn)));
570        assert_eq!(lexer.next(), Some(Ok(Token::KwSelf)));
571        assert_eq!(lexer.next(), Some(Ok(Token::KwTrue)));
572        assert_eq!(lexer.next(), Some(Ok(Token::KwFalse)));
573        assert_eq!(lexer.next(), None);
574    }
575
576    #[test]
577    fn lex_type_keywords() {
578        let mut lexer = Token::lexer("Int Float Bool String Unit List Option Inferred Agent");
579        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
580        assert_eq!(lexer.next(), Some(Ok(Token::TyFloat)));
581        assert_eq!(lexer.next(), Some(Ok(Token::TyBool)));
582        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
583        assert_eq!(lexer.next(), Some(Ok(Token::TyUnit)));
584        assert_eq!(lexer.next(), Some(Ok(Token::TyList)));
585        assert_eq!(lexer.next(), Some(Ok(Token::TyOption)));
586        assert_eq!(lexer.next(), Some(Ok(Token::TyInferred)));
587        assert_eq!(lexer.next(), Some(Ok(Token::TyAgent)));
588        assert_eq!(lexer.next(), None);
589    }
590
591    #[test]
592    fn lex_integer_literals() {
593        let mut lexer = Token::lexer("42 -7 0 123456");
594        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
595        assert_eq!(lexer.slice(), "42");
596        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
597        assert_eq!(lexer.slice(), "-7");
598        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
599        assert_eq!(lexer.slice(), "0");
600        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
601        assert_eq!(lexer.slice(), "123456");
602        assert_eq!(lexer.next(), None);
603    }
604
605    #[test]
606    fn lex_float_literals() {
607        let mut lexer = Token::lexer("3.14 -0.5 0.0 123.456");
608        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
609        assert_eq!(lexer.slice(), "3.14");
610        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
611        assert_eq!(lexer.slice(), "-0.5");
612        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
613        assert_eq!(lexer.slice(), "0.0");
614        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
615        assert_eq!(lexer.slice(), "123.456");
616        assert_eq!(lexer.next(), None);
617    }
618
619    #[test]
620    fn lex_string_literals() {
621        let mut lexer = Token::lexer(r#""hello" "world" "with spaces""#);
622        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
623        assert_eq!(lexer.slice(), r#""hello""#);
624        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
625        assert_eq!(lexer.slice(), r#""world""#);
626        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
627        assert_eq!(lexer.slice(), r#""with spaces""#);
628        assert_eq!(lexer.next(), None);
629    }
630
631    #[test]
632    fn lex_string_with_escapes() {
633        let mut lexer = Token::lexer(r#""hello\nworld" "tab\there" "quote\"here""#);
634        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
635        assert_eq!(lexer.slice(), r#""hello\nworld""#);
636        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
637        assert_eq!(lexer.slice(), r#""tab\there""#);
638        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
639        assert_eq!(lexer.slice(), r#""quote\"here""#);
640        assert_eq!(lexer.next(), None);
641    }
642
643    #[test]
644    fn lex_identifiers() {
645        let mut lexer = Token::lexer("foo bar _private myAgent agent2");
646        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
647        assert_eq!(lexer.slice(), "foo");
648        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
649        assert_eq!(lexer.slice(), "bar");
650        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
651        assert_eq!(lexer.slice(), "_private");
652        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
653        assert_eq!(lexer.slice(), "myAgent");
654        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
655        assert_eq!(lexer.slice(), "agent2");
656        assert_eq!(lexer.next(), None);
657    }
658
659    #[test]
660    fn keyword_vs_identifier() {
661        // "agent" is a keyword, "agent_name" is an identifier
662        let mut lexer = Token::lexer("agent agent_name agents");
663        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
664        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
665        assert_eq!(lexer.slice(), "agent_name");
666        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
667        assert_eq!(lexer.slice(), "agents");
668        assert_eq!(lexer.next(), None);
669    }
670
671    #[test]
672    fn lex_punctuation() {
673        let mut lexer = Token::lexer("{ } ( ) [ ] , : . ->");
674        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
675        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
676        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
677        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
678        assert_eq!(lexer.next(), Some(Ok(Token::LBracket)));
679        assert_eq!(lexer.next(), Some(Ok(Token::RBracket)));
680        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
681        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
682        assert_eq!(lexer.next(), Some(Ok(Token::Dot)));
683        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
684        assert_eq!(lexer.next(), None);
685    }
686
687    #[test]
688    fn lex_operators() {
689        let mut lexer = Token::lexer("= == != < > <= >= + - * / % ! && || ++");
690        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
691        assert_eq!(lexer.next(), Some(Ok(Token::EqEq)));
692        assert_eq!(lexer.next(), Some(Ok(Token::Ne)));
693        assert_eq!(lexer.next(), Some(Ok(Token::Lt)));
694        assert_eq!(lexer.next(), Some(Ok(Token::Gt)));
695        assert_eq!(lexer.next(), Some(Ok(Token::Le)));
696        assert_eq!(lexer.next(), Some(Ok(Token::Ge)));
697        assert_eq!(lexer.next(), Some(Ok(Token::Plus)));
698        assert_eq!(lexer.next(), Some(Ok(Token::Minus)));
699        assert_eq!(lexer.next(), Some(Ok(Token::Star)));
700        assert_eq!(lexer.next(), Some(Ok(Token::Slash)));
701        assert_eq!(lexer.next(), Some(Ok(Token::Percent)));
702        assert_eq!(lexer.next(), Some(Ok(Token::Bang)));
703        assert_eq!(lexer.next(), Some(Ok(Token::And)));
704        assert_eq!(lexer.next(), Some(Ok(Token::Or)));
705        assert_eq!(lexer.next(), Some(Ok(Token::PlusPlus)));
706        assert_eq!(lexer.next(), None);
707    }
708
709    #[test]
710    fn skip_whitespace() {
711        let mut lexer = Token::lexer("  agent   belief\n\ttrue  ");
712        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
713        assert_eq!(lexer.next(), Some(Ok(Token::KwBelief)));
714        assert_eq!(lexer.next(), Some(Ok(Token::KwTrue)));
715        assert_eq!(lexer.next(), None);
716    }
717
718    #[test]
719    fn skip_comments() {
720        let mut lexer = Token::lexer("agent // this is a comment\nbelief");
721        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
722        assert_eq!(lexer.next(), Some(Ok(Token::KwBelief)));
723        assert_eq!(lexer.next(), None);
724    }
725
726    #[test]
727    fn comment_at_end() {
728        let mut lexer = Token::lexer("agent // comment at end");
729        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
730        assert_eq!(lexer.next(), None);
731    }
732
733    #[test]
734    fn lex_agent_declaration() {
735        let source = r#"
736            agent Researcher {
737                belief topic: String
738
739                on start {
740                    let result: Inferred<String> = infer("test")
741                    emit(result)
742                }
743            }
744        "#;
745        let tokens: Vec<_> = Token::lexer(source)
746            .map(|r| r.expect("valid token"))
747            .collect();
748
749        assert_eq!(tokens[0], Token::KwAgent);
750        assert_eq!(tokens[1], Token::Ident); // Researcher
751        assert_eq!(tokens[2], Token::LBrace);
752        assert_eq!(tokens[3], Token::KwBelief);
753        assert_eq!(tokens[4], Token::Ident); // topic
754        assert_eq!(tokens[5], Token::Colon);
755        assert_eq!(tokens[6], Token::TyString);
756        assert_eq!(tokens[7], Token::KwOn);
757        assert_eq!(tokens[8], Token::KwStart);
758        assert_eq!(tokens[9], Token::LBrace);
759        assert_eq!(tokens[10], Token::KwLet);
760    }
761
762    #[test]
763    fn is_keyword_helper() {
764        assert!(Token::KwAgent.is_keyword());
765        assert!(Token::KwLet.is_keyword());
766        assert!(!Token::TyInt.is_keyword());
767        assert!(!Token::Ident.is_keyword());
768    }
769
770    #[test]
771    fn is_type_keyword_helper() {
772        assert!(Token::TyInt.is_type_keyword());
773        assert!(Token::TyAgent.is_type_keyword());
774        assert!(!Token::KwAgent.is_type_keyword());
775        assert!(!Token::Ident.is_type_keyword());
776    }
777
778    #[test]
779    fn is_literal_helper() {
780        assert!(Token::IntLit.is_literal());
781        assert!(Token::FloatLit.is_literal());
782        assert!(Token::StringLit.is_literal());
783        assert!(Token::KwTrue.is_literal());
784        assert!(Token::KwFalse.is_literal());
785        assert!(!Token::Ident.is_literal());
786    }
787
788    #[test]
789    fn is_operator_helper() {
790        assert!(Token::Plus.is_operator());
791        assert!(Token::EqEq.is_operator());
792        assert!(Token::PlusPlus.is_operator());
793        assert!(!Token::LBrace.is_operator());
794        assert!(!Token::Ident.is_operator());
795    }
796
797    #[test]
798    fn lex_module_keywords() {
799        let mut lexer = Token::lexer("mod use pub as super");
800        assert_eq!(lexer.next(), Some(Ok(Token::KwMod)));
801        assert_eq!(lexer.next(), Some(Ok(Token::KwUse)));
802        assert_eq!(lexer.next(), Some(Ok(Token::KwPub)));
803        assert_eq!(lexer.next(), Some(Ok(Token::KwAs)));
804        assert_eq!(lexer.next(), Some(Ok(Token::KwSuper)));
805        assert_eq!(lexer.next(), None);
806    }
807
808    #[test]
809    fn lex_path_separator() {
810        let mut lexer = Token::lexer("agents::Researcher");
811        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
812        assert_eq!(lexer.slice(), "agents");
813        assert_eq!(lexer.next(), Some(Ok(Token::ColonColon)));
814        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
815        assert_eq!(lexer.slice(), "Researcher");
816        assert_eq!(lexer.next(), None);
817    }
818
819    #[test]
820    fn lex_use_statement() {
821        let mut lexer = Token::lexer("use agents::{Researcher, Coordinator as Coord}");
822        assert_eq!(lexer.next(), Some(Ok(Token::KwUse)));
823        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // agents
824        assert_eq!(lexer.next(), Some(Ok(Token::ColonColon)));
825        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
826        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Researcher
827        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
828        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Coordinator
829        assert_eq!(lexer.next(), Some(Ok(Token::KwAs)));
830        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Coord
831        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
832        assert_eq!(lexer.next(), None);
833    }
834
835    #[test]
836    fn lex_pub_agent() {
837        let mut lexer = Token::lexer("pub agent Researcher");
838        assert_eq!(lexer.next(), Some(Ok(Token::KwPub)));
839        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
840        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
841        assert_eq!(lexer.next(), None);
842    }
843
844    #[test]
845    fn token_display() {
846        assert_eq!(format!("{}", Token::KwAgent), "agent");
847        assert_eq!(format!("{}", Token::TyInt), "Int");
848        assert_eq!(format!("{}", Token::IntLit), "<int>");
849        assert_eq!(format!("{}", Token::Ident), "<ident>");
850        assert_eq!(format!("{}", Token::LBrace), "{");
851        assert_eq!(format!("{}", Token::PlusPlus), "++");
852    }
853
854    #[test]
855    fn lex_type_keywords_record_enum_match_const() {
856        let mut lexer = Token::lexer("record enum match const");
857        assert_eq!(lexer.next(), Some(Ok(Token::KwRecord)));
858        assert_eq!(lexer.next(), Some(Ok(Token::KwEnum)));
859        assert_eq!(lexer.next(), Some(Ok(Token::KwMatch)));
860        assert_eq!(lexer.next(), Some(Ok(Token::KwConst)));
861        assert_eq!(lexer.next(), None);
862    }
863
864    #[test]
865    fn lex_fat_arrow() {
866        let mut lexer = Token::lexer("=> -> =");
867        assert_eq!(lexer.next(), Some(Ok(Token::FatArrow)));
868        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
869        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
870        assert_eq!(lexer.next(), None);
871    }
872
873    #[test]
874    fn lex_match_expression() {
875        let mut lexer = Token::lexer("match status { Active => 1, Inactive => 0 }");
876        assert_eq!(lexer.next(), Some(Ok(Token::KwMatch)));
877        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // status
878        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
879        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Active
880        assert_eq!(lexer.next(), Some(Ok(Token::FatArrow)));
881        assert_eq!(lexer.next(), Some(Ok(Token::IntLit))); // 1
882        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
883        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Inactive
884        assert_eq!(lexer.next(), Some(Ok(Token::FatArrow)));
885        assert_eq!(lexer.next(), Some(Ok(Token::IntLit))); // 0
886        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
887        assert_eq!(lexer.next(), None);
888    }
889
890    #[test]
891    fn lex_record_declaration() {
892        let mut lexer = Token::lexer("record Point { x: Int, y: Int }");
893        assert_eq!(lexer.next(), Some(Ok(Token::KwRecord)));
894        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Point
895        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
896        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
897        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
898        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
899        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
900        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // y
901        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
902        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
903        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
904        assert_eq!(lexer.next(), None);
905    }
906
907    #[test]
908    fn lex_enum_declaration() {
909        let mut lexer = Token::lexer("enum Status { Active, Pending, Done }");
910        assert_eq!(lexer.next(), Some(Ok(Token::KwEnum)));
911        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Status
912        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
913        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Active
914        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
915        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Pending
916        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
917        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Done
918        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
919        assert_eq!(lexer.next(), None);
920    }
921
922    #[test]
923    fn lex_const_declaration() {
924        let mut lexer = Token::lexer("const MAX_RETRIES: Int = 3");
925        assert_eq!(lexer.next(), Some(Ok(Token::KwConst)));
926        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // MAX_RETRIES
927        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
928        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
929        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
930        assert_eq!(lexer.next(), Some(Ok(Token::IntLit))); // 3
931        assert_eq!(lexer.next(), None);
932    }
933
934    #[test]
935    fn new_keywords_are_keywords() {
936        assert!(Token::KwRecord.is_keyword());
937        assert!(Token::KwEnum.is_keyword());
938        assert!(Token::KwMatch.is_keyword());
939        assert!(Token::KwConst.is_keyword());
940    }
941
942    #[test]
943    fn lex_loop_break() {
944        let mut lexer = Token::lexer("loop { break }");
945        assert_eq!(lexer.next(), Some(Ok(Token::KwLoop)));
946        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
947        assert_eq!(lexer.next(), Some(Ok(Token::KwBreak)));
948        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
949        assert_eq!(lexer.next(), None);
950    }
951
952    #[test]
953    fn lex_receives_receive() {
954        let mut lexer = Token::lexer("agent Worker receives WorkerMsg { receive }");
955        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
956        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Worker
957        assert_eq!(lexer.next(), Some(Ok(Token::KwReceives)));
958        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // WorkerMsg
959        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
960        assert_eq!(lexer.next(), Some(Ok(Token::KwReceive)));
961        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
962        assert_eq!(lexer.next(), None);
963    }
964
965    #[test]
966    fn rfc6_keywords_are_keywords() {
967        assert!(Token::KwLoop.is_keyword());
968        assert!(Token::KwBreak.is_keyword());
969        assert!(Token::KwReceives.is_keyword());
970        assert!(Token::KwReceive.is_keyword());
971    }
972
973    #[test]
974    fn lex_error_handling_keywords() {
975        let mut lexer = Token::lexer("fails try catch error");
976        assert_eq!(lexer.next(), Some(Ok(Token::KwFails)));
977        assert_eq!(lexer.next(), Some(Ok(Token::KwTry)));
978        assert_eq!(lexer.next(), Some(Ok(Token::KwCatch)));
979        assert_eq!(lexer.next(), Some(Ok(Token::KwError)));
980        assert_eq!(lexer.next(), None);
981    }
982
983    #[test]
984    fn lex_try_catch_expression() {
985        let mut lexer = Token::lexer("let x = try infer(prompt) catch { fallback }");
986        assert_eq!(lexer.next(), Some(Ok(Token::KwLet)));
987        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
988        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
989        assert_eq!(lexer.next(), Some(Ok(Token::KwTry)));
990        assert_eq!(lexer.next(), Some(Ok(Token::KwInfer)));
991        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
992        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // prompt
993        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
994        assert_eq!(lexer.next(), Some(Ok(Token::KwCatch)));
995        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
996        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // fallback
997        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
998        assert_eq!(lexer.next(), None);
999    }
1000
1001    #[test]
1002    fn lex_fails_function() {
1003        let mut lexer = Token::lexer("fn fetch(url: String) -> String fails { }");
1004        assert_eq!(lexer.next(), Some(Ok(Token::KwFn)));
1005        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // fetch
1006        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1007        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // url
1008        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
1009        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1010        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1011        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
1012        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1013        assert_eq!(lexer.next(), Some(Ok(Token::KwFails)));
1014        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1015        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1016        assert_eq!(lexer.next(), None);
1017    }
1018
1019    #[test]
1020    fn lex_on_error_handler() {
1021        let mut lexer = Token::lexer("on error(e) { emit(fallback) }");
1022        assert_eq!(lexer.next(), Some(Ok(Token::KwOn)));
1023        assert_eq!(lexer.next(), Some(Ok(Token::KwError)));
1024        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1025        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // e
1026        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1027        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1028        assert_eq!(lexer.next(), Some(Ok(Token::KwEmit)));
1029        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1030        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // fallback
1031        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1032        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1033        assert_eq!(lexer.next(), None);
1034    }
1035
1036    #[test]
1037    fn rfc7_keywords_are_keywords() {
1038        assert!(Token::KwFails.is_keyword());
1039        assert!(Token::KwTry.is_keyword());
1040        assert!(Token::KwCatch.is_keyword());
1041        assert!(Token::KwError.is_keyword());
1042    }
1043
1044    // =========================================================================
1045    // RFC-0009: Closures
1046    // =========================================================================
1047
1048    #[test]
1049    fn lex_closure_syntax() {
1050        // |x: Int| x + 1
1051        let mut lexer = Token::lexer("|x: Int| x + 1");
1052        assert_eq!(lexer.next(), Some(Ok(Token::Pipe)));
1053        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
1054        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
1055        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
1056        assert_eq!(lexer.next(), Some(Ok(Token::Pipe)));
1057        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
1058        assert_eq!(lexer.next(), Some(Ok(Token::Plus)));
1059        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
1060        assert_eq!(lexer.next(), None);
1061    }
1062
1063    #[test]
1064    fn lex_empty_closure() {
1065        // || 42
1066        let mut lexer = Token::lexer("|| 42");
1067        assert_eq!(lexer.next(), Some(Ok(Token::Or))); // || lexes as Or
1068        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
1069        assert_eq!(lexer.next(), None);
1070    }
1071
1072    #[test]
1073    fn lex_fn_type() {
1074        // Fn(Int, String) -> Bool
1075        let mut lexer = Token::lexer("Fn(Int, String) -> Bool");
1076        assert_eq!(lexer.next(), Some(Ok(Token::TyFn)));
1077        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1078        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
1079        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
1080        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1081        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1082        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
1083        assert_eq!(lexer.next(), Some(Ok(Token::TyBool)));
1084        assert_eq!(lexer.next(), None);
1085    }
1086
1087    #[test]
1088    fn fn_is_type_keyword() {
1089        assert!(Token::TyFn.is_type_keyword());
1090    }
1091
1092    #[test]
1093    fn pipe_display() {
1094        assert_eq!(format!("{}", Token::Pipe), "|");
1095        assert_eq!(format!("{}", Token::TyFn), "Fn");
1096    }
1097
1098    // =========================================================================
1099    // RFC-0011: Tool Support
1100    // =========================================================================
1101
1102    #[test]
1103    fn lex_tool_keyword() {
1104        let mut lexer = Token::lexer("tool Http { fn get(url: String) -> String }");
1105        assert_eq!(lexer.next(), Some(Ok(Token::KwTool)));
1106        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Http
1107        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1108        assert_eq!(lexer.next(), Some(Ok(Token::KwFn)));
1109        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // get
1110        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1111        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // url
1112        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
1113        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1114        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1115        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
1116        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1117        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1118        assert_eq!(lexer.next(), None);
1119    }
1120
1121    #[test]
1122    fn tool_is_keyword() {
1123        assert!(Token::KwTool.is_keyword());
1124    }
1125
1126    #[test]
1127    fn lex_agent_use_tool() {
1128        let mut lexer = Token::lexer("agent Fetcher { use Http }");
1129        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
1130        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Fetcher
1131        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1132        assert_eq!(lexer.next(), Some(Ok(Token::KwUse)));
1133        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Http
1134        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1135        assert_eq!(lexer.next(), None);
1136    }
1137}