Skip to main content

sage_parser/
token.rs

1//! Token definitions for the Sage lexer.
2
3use logos::Logos;
4
5/// All tokens in the Sage language.
6#[derive(Logos, Debug, Clone, PartialEq, Eq, Hash)]
7#[logos(skip r"[ \t\r\n]+")]
8#[logos(skip r"//[^\n]*")]
9pub enum Token {
10    // =========================================================================
11    // Keywords
12    // =========================================================================
13    #[token("agent")]
14    KwAgent,
15
16    #[token("belief")]
17    KwBelief,
18
19    #[token("on")]
20    KwOn,
21
22    #[token("start")]
23    KwStart,
24
25    #[token("stop")]
26    KwStop,
27
28    #[token("message")]
29    KwMessage,
30
31    #[token("infer")]
32    KwInfer,
33
34    #[token("spawn")]
35    KwSpawn,
36
37    #[token("await")]
38    KwAwait,
39
40    #[token("send")]
41    KwSend,
42
43    #[token("emit")]
44    KwEmit,
45
46    #[token("run")]
47    KwRun,
48
49    #[token("fn")]
50    KwFn,
51
52    #[token("let")]
53    KwLet,
54
55    #[token("return")]
56    KwReturn,
57
58    #[token("if")]
59    KwIf,
60
61    #[token("else")]
62    KwElse,
63
64    #[token("for")]
65    KwFor,
66
67    #[token("while")]
68    KwWhile,
69
70    #[token("loop")]
71    KwLoop,
72
73    #[token("break")]
74    KwBreak,
75
76    #[token("in")]
77    KwIn,
78
79    #[token("self")]
80    KwSelf,
81
82    #[token("true")]
83    KwTrue,
84
85    #[token("false")]
86    KwFalse,
87
88    #[token("mod")]
89    KwMod,
90
91    #[token("use")]
92    KwUse,
93
94    #[token("pub")]
95    KwPub,
96
97    #[token("as")]
98    KwAs,
99
100    #[token("super")]
101    KwSuper,
102
103    #[token("record")]
104    KwRecord,
105
106    #[token("enum")]
107    KwEnum,
108
109    #[token("match")]
110    KwMatch,
111
112    #[token("const")]
113    KwConst,
114
115    #[token("receives")]
116    KwReceives,
117
118    #[token("receive")]
119    KwReceive,
120
121    #[token("fail")]
122    KwFail,
123
124    #[token("fails")]
125    KwFails,
126
127    #[token("timeout")]
128    KwTimeout,
129
130    #[token("retry")]
131    KwRetry,
132
133    #[token("delay")]
134    KwDelay,
135
136    #[token("try")]
137    KwTry,
138
139    #[token("catch")]
140    KwCatch,
141
142    #[token("error")]
143    KwError,
144
145    #[token("tool")]
146    KwTool,
147
148    /// RFC-0012: Test declaration keyword.
149    #[token("test")]
150    KwTest,
151
152    /// RFC-0012: Mock keyword for LLM mocking.
153    #[token("mock")]
154    KwMock,
155
156    /// Trace keyword for emitting trace events.
157    #[token("trace")]
158    KwTrace,
159
160    // =========================================================================
161    // Type keywords
162    // =========================================================================
163    #[token("Int")]
164    TyInt,
165
166    #[token("Float")]
167    TyFloat,
168
169    #[token("Bool")]
170    TyBool,
171
172    #[token("String")]
173    TyString,
174
175    #[token("Unit")]
176    TyUnit,
177
178    #[token("List")]
179    TyList,
180
181    #[token("Option")]
182    TyOption,
183
184    #[token("Inferred")]
185    TyInferred,
186
187    #[token("Agent")]
188    TyAgent,
189
190    #[token("Error")]
191    TyError,
192
193    #[token("ErrorKind")]
194    TyErrorKind,
195
196    /// Function type keyword: `Fn`
197    #[token("Fn")]
198    TyFn,
199
200    /// Map type keyword: `Map`
201    #[token("Map")]
202    TyMap,
203
204    /// Result type keyword: `Result`
205    #[token("Result")]
206    TyResult,
207
208    // =========================================================================
209    // Literals
210    // =========================================================================
211    /// Integer literal (e.g., `42`, `-7`).
212    #[regex(r"-?[0-9]+", priority = 2)]
213    IntLit,
214
215    /// Float literal (e.g., `3.14`, `-0.5`).
216    #[regex(r"-?[0-9]+\.[0-9]+")]
217    FloatLit,
218
219    /// String literal (e.g., `"hello"`).
220    /// Supports escape sequences: \n, \t, \r, \\, \"
221    #[regex(r#""([^"\\]|\\.)*""#)]
222    StringLit,
223
224    // =========================================================================
225    // Identifiers
226    // =========================================================================
227    /// Identifier (e.g., `foo`, `myAgent`, `_private`).
228    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
229    Ident,
230
231    // =========================================================================
232    // Punctuation
233    // =========================================================================
234    #[token("{")]
235    LBrace,
236
237    #[token("}")]
238    RBrace,
239
240    #[token("(")]
241    LParen,
242
243    #[token(")")]
244    RParen,
245
246    #[token("[")]
247    LBracket,
248
249    #[token("]")]
250    RBracket,
251
252    #[token(",")]
253    Comma,
254
255    #[token("::")]
256    ColonColon,
257
258    #[token(":")]
259    Colon,
260
261    #[token(".")]
262    Dot,
263
264    #[token("->")]
265    Arrow,
266
267    #[token("=>")]
268    FatArrow,
269
270    /// Annotation marker for test attributes.
271    #[token("@")]
272    At,
273
274    // =========================================================================
275    // Operators
276    // =========================================================================
277    #[token("=")]
278    Eq,
279
280    #[token("==")]
281    EqEq,
282
283    #[token("!=")]
284    Ne,
285
286    #[token("<")]
287    Lt,
288
289    #[token(">")]
290    Gt,
291
292    #[token("<=")]
293    Le,
294
295    #[token(">=")]
296    Ge,
297
298    #[token("+")]
299    Plus,
300
301    #[token("-")]
302    Minus,
303
304    #[token("*")]
305    Star,
306
307    #[token("/")]
308    Slash,
309
310    #[token("!")]
311    Bang,
312
313    #[token("&&")]
314    And,
315
316    #[token("||")]
317    Or,
318
319    /// Single pipe for closure parameters: `|`
320    #[token("|")]
321    Pipe,
322
323    /// String concatenation operator.
324    #[token("++")]
325    PlusPlus,
326
327    /// Modulo/remainder operator.
328    #[token("%")]
329    Percent,
330
331    /// Statement terminator.
332    #[token(";")]
333    Semicolon,
334}
335
336impl Token {
337    /// Returns true if this token is a keyword.
338    #[must_use]
339    pub fn is_keyword(&self) -> bool {
340        matches!(
341            self,
342            Token::KwAgent
343                | Token::KwBelief
344                | Token::KwOn
345                | Token::KwStart
346                | Token::KwStop
347                | Token::KwMessage
348                | Token::KwInfer
349                | Token::KwSpawn
350                | Token::KwAwait
351                | Token::KwSend
352                | Token::KwEmit
353                | Token::KwRun
354                | Token::KwFn
355                | Token::KwLet
356                | Token::KwReturn
357                | Token::KwIf
358                | Token::KwElse
359                | Token::KwFor
360                | Token::KwWhile
361                | Token::KwLoop
362                | Token::KwBreak
363                | Token::KwIn
364                | Token::KwSelf
365                | Token::KwTrue
366                | Token::KwFalse
367                | Token::KwMod
368                | Token::KwUse
369                | Token::KwPub
370                | Token::KwAs
371                | Token::KwSuper
372                | Token::KwRecord
373                | Token::KwEnum
374                | Token::KwMatch
375                | Token::KwConst
376                | Token::KwReceives
377                | Token::KwReceive
378                | Token::KwFail
379                | Token::KwFails
380                | Token::KwTimeout
381                | Token::KwRetry
382                | Token::KwDelay
383                | Token::KwTry
384                | Token::KwCatch
385                | Token::KwError
386                | Token::KwTool
387                | Token::KwTrace
388        )
389    }
390
391    /// Returns true if this token is a type keyword.
392    #[must_use]
393    pub fn is_type_keyword(&self) -> bool {
394        matches!(
395            self,
396            Token::TyInt
397                | Token::TyFloat
398                | Token::TyBool
399                | Token::TyString
400                | Token::TyUnit
401                | Token::TyList
402                | Token::TyOption
403                | Token::TyInferred
404                | Token::TyAgent
405                | Token::TyError
406                | Token::TyErrorKind
407                | Token::TyFn
408                | Token::TyMap
409                | Token::TyResult
410        )
411    }
412
413    /// Returns true if this token is a literal.
414    #[must_use]
415    pub fn is_literal(&self) -> bool {
416        matches!(
417            self,
418            Token::IntLit | Token::FloatLit | Token::StringLit | Token::KwTrue | Token::KwFalse
419        )
420    }
421
422    /// Returns true if this token is an operator.
423    #[must_use]
424    pub fn is_operator(&self) -> bool {
425        matches!(
426            self,
427            Token::Eq
428                | Token::EqEq
429                | Token::Ne
430                | Token::Lt
431                | Token::Gt
432                | Token::Le
433                | Token::Ge
434                | Token::Plus
435                | Token::Minus
436                | Token::Star
437                | Token::Slash
438                | Token::Percent
439                | Token::Bang
440                | Token::And
441                | Token::Or
442                | Token::PlusPlus
443        )
444    }
445}
446
447impl std::fmt::Display for Token {
448    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
449        match self {
450            // Keywords
451            Token::KwAgent => write!(f, "agent"),
452            Token::KwBelief => write!(f, "belief"),
453            Token::KwOn => write!(f, "on"),
454            Token::KwStart => write!(f, "start"),
455            Token::KwStop => write!(f, "stop"),
456            Token::KwMessage => write!(f, "message"),
457            Token::KwInfer => write!(f, "infer"),
458            Token::KwSpawn => write!(f, "spawn"),
459            Token::KwAwait => write!(f, "await"),
460            Token::KwSend => write!(f, "send"),
461            Token::KwEmit => write!(f, "emit"),
462            Token::KwRun => write!(f, "run"),
463            Token::KwFn => write!(f, "fn"),
464            Token::KwLet => write!(f, "let"),
465            Token::KwReturn => write!(f, "return"),
466            Token::KwIf => write!(f, "if"),
467            Token::KwElse => write!(f, "else"),
468            Token::KwFor => write!(f, "for"),
469            Token::KwWhile => write!(f, "while"),
470            Token::KwLoop => write!(f, "loop"),
471            Token::KwBreak => write!(f, "break"),
472            Token::KwIn => write!(f, "in"),
473            Token::KwSelf => write!(f, "self"),
474            Token::KwTrue => write!(f, "true"),
475            Token::KwFalse => write!(f, "false"),
476            Token::KwMod => write!(f, "mod"),
477            Token::KwUse => write!(f, "use"),
478            Token::KwPub => write!(f, "pub"),
479            Token::KwAs => write!(f, "as"),
480            Token::KwSuper => write!(f, "super"),
481            Token::KwRecord => write!(f, "record"),
482            Token::KwEnum => write!(f, "enum"),
483            Token::KwMatch => write!(f, "match"),
484            Token::KwConst => write!(f, "const"),
485            Token::KwReceives => write!(f, "receives"),
486            Token::KwReceive => write!(f, "receive"),
487            Token::KwFail => write!(f, "fail"),
488            Token::KwFails => write!(f, "fails"),
489            Token::KwTimeout => write!(f, "timeout"),
490            Token::KwRetry => write!(f, "retry"),
491            Token::KwDelay => write!(f, "delay"),
492            Token::KwTry => write!(f, "try"),
493            Token::KwCatch => write!(f, "catch"),
494            Token::KwError => write!(f, "error"),
495            Token::KwTool => write!(f, "tool"),
496            Token::KwTest => write!(f, "test"),
497            Token::KwMock => write!(f, "mock"),
498            Token::KwTrace => write!(f, "trace"),
499
500            // Type keywords
501            Token::TyInt => write!(f, "Int"),
502            Token::TyFloat => write!(f, "Float"),
503            Token::TyBool => write!(f, "Bool"),
504            Token::TyString => write!(f, "String"),
505            Token::TyUnit => write!(f, "Unit"),
506            Token::TyList => write!(f, "List"),
507            Token::TyOption => write!(f, "Option"),
508            Token::TyInferred => write!(f, "Inferred"),
509            Token::TyAgent => write!(f, "Agent"),
510            Token::TyError => write!(f, "Error"),
511            Token::TyErrorKind => write!(f, "ErrorKind"),
512            Token::TyFn => write!(f, "Fn"),
513            Token::TyMap => write!(f, "Map"),
514            Token::TyResult => write!(f, "Result"),
515
516            // Literals
517            Token::IntLit => write!(f, "<int>"),
518            Token::FloatLit => write!(f, "<float>"),
519            Token::StringLit => write!(f, "<string>"),
520
521            // Identifier
522            Token::Ident => write!(f, "<ident>"),
523
524            // Punctuation
525            Token::LBrace => write!(f, "{{"),
526            Token::RBrace => write!(f, "}}"),
527            Token::LParen => write!(f, "("),
528            Token::RParen => write!(f, ")"),
529            Token::LBracket => write!(f, "["),
530            Token::RBracket => write!(f, "]"),
531            Token::Comma => write!(f, ","),
532            Token::ColonColon => write!(f, "::"),
533            Token::Colon => write!(f, ":"),
534            Token::Dot => write!(f, "."),
535            Token::Arrow => write!(f, "->"),
536            Token::FatArrow => write!(f, "=>"),
537            Token::At => write!(f, "@"),
538
539            // Operators
540            Token::Eq => write!(f, "="),
541            Token::EqEq => write!(f, "=="),
542            Token::Ne => write!(f, "!="),
543            Token::Lt => write!(f, "<"),
544            Token::Gt => write!(f, ">"),
545            Token::Le => write!(f, "<="),
546            Token::Ge => write!(f, ">="),
547            Token::Plus => write!(f, "+"),
548            Token::Minus => write!(f, "-"),
549            Token::Star => write!(f, "*"),
550            Token::Slash => write!(f, "/"),
551            Token::Bang => write!(f, "!"),
552            Token::And => write!(f, "&&"),
553            Token::Or => write!(f, "||"),
554            Token::Pipe => write!(f, "|"),
555            Token::PlusPlus => write!(f, "++"),
556            Token::Percent => write!(f, "%"),
557            Token::Semicolon => write!(f, ";"),
558        }
559    }
560}
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565
566    #[test]
567    fn lex_keywords() {
568        let mut lexer = Token::lexer("agent belief on start stop message");
569        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
570        assert_eq!(lexer.next(), Some(Ok(Token::KwBelief)));
571        assert_eq!(lexer.next(), Some(Ok(Token::KwOn)));
572        assert_eq!(lexer.next(), Some(Ok(Token::KwStart)));
573        assert_eq!(lexer.next(), Some(Ok(Token::KwStop)));
574        assert_eq!(lexer.next(), Some(Ok(Token::KwMessage)));
575        assert_eq!(lexer.next(), None);
576    }
577
578    #[test]
579    fn lex_more_keywords() {
580        let mut lexer = Token::lexer(
581            "infer spawn await send emit run fn let return if else for in self true false",
582        );
583        assert_eq!(lexer.next(), Some(Ok(Token::KwInfer)));
584        assert_eq!(lexer.next(), Some(Ok(Token::KwSpawn)));
585        assert_eq!(lexer.next(), Some(Ok(Token::KwAwait)));
586        assert_eq!(lexer.next(), Some(Ok(Token::KwSend)));
587        assert_eq!(lexer.next(), Some(Ok(Token::KwEmit)));
588        assert_eq!(lexer.next(), Some(Ok(Token::KwRun)));
589        assert_eq!(lexer.next(), Some(Ok(Token::KwFn)));
590        assert_eq!(lexer.next(), Some(Ok(Token::KwLet)));
591        assert_eq!(lexer.next(), Some(Ok(Token::KwReturn)));
592        assert_eq!(lexer.next(), Some(Ok(Token::KwIf)));
593        assert_eq!(lexer.next(), Some(Ok(Token::KwElse)));
594        assert_eq!(lexer.next(), Some(Ok(Token::KwFor)));
595        assert_eq!(lexer.next(), Some(Ok(Token::KwIn)));
596        assert_eq!(lexer.next(), Some(Ok(Token::KwSelf)));
597        assert_eq!(lexer.next(), Some(Ok(Token::KwTrue)));
598        assert_eq!(lexer.next(), Some(Ok(Token::KwFalse)));
599        assert_eq!(lexer.next(), None);
600    }
601
602    #[test]
603    fn lex_type_keywords() {
604        let mut lexer = Token::lexer("Int Float Bool String Unit List Option Inferred Agent");
605        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
606        assert_eq!(lexer.next(), Some(Ok(Token::TyFloat)));
607        assert_eq!(lexer.next(), Some(Ok(Token::TyBool)));
608        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
609        assert_eq!(lexer.next(), Some(Ok(Token::TyUnit)));
610        assert_eq!(lexer.next(), Some(Ok(Token::TyList)));
611        assert_eq!(lexer.next(), Some(Ok(Token::TyOption)));
612        assert_eq!(lexer.next(), Some(Ok(Token::TyInferred)));
613        assert_eq!(lexer.next(), Some(Ok(Token::TyAgent)));
614        assert_eq!(lexer.next(), None);
615    }
616
617    #[test]
618    fn lex_integer_literals() {
619        let mut lexer = Token::lexer("42 -7 0 123456");
620        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
621        assert_eq!(lexer.slice(), "42");
622        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
623        assert_eq!(lexer.slice(), "-7");
624        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
625        assert_eq!(lexer.slice(), "0");
626        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
627        assert_eq!(lexer.slice(), "123456");
628        assert_eq!(lexer.next(), None);
629    }
630
631    #[test]
632    fn lex_float_literals() {
633        let mut lexer = Token::lexer("3.14 -0.5 0.0 123.456");
634        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
635        assert_eq!(lexer.slice(), "3.14");
636        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
637        assert_eq!(lexer.slice(), "-0.5");
638        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
639        assert_eq!(lexer.slice(), "0.0");
640        assert_eq!(lexer.next(), Some(Ok(Token::FloatLit)));
641        assert_eq!(lexer.slice(), "123.456");
642        assert_eq!(lexer.next(), None);
643    }
644
645    #[test]
646    fn lex_string_literals() {
647        let mut lexer = Token::lexer(r#""hello" "world" "with spaces""#);
648        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
649        assert_eq!(lexer.slice(), r#""hello""#);
650        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
651        assert_eq!(lexer.slice(), r#""world""#);
652        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
653        assert_eq!(lexer.slice(), r#""with spaces""#);
654        assert_eq!(lexer.next(), None);
655    }
656
657    #[test]
658    fn lex_string_with_escapes() {
659        let mut lexer = Token::lexer(r#""hello\nworld" "tab\there" "quote\"here""#);
660        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
661        assert_eq!(lexer.slice(), r#""hello\nworld""#);
662        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
663        assert_eq!(lexer.slice(), r#""tab\there""#);
664        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
665        assert_eq!(lexer.slice(), r#""quote\"here""#);
666        assert_eq!(lexer.next(), None);
667    }
668
669    #[test]
670    fn lex_identifiers() {
671        let mut lexer = Token::lexer("foo bar _private myAgent agent2");
672        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
673        assert_eq!(lexer.slice(), "foo");
674        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
675        assert_eq!(lexer.slice(), "bar");
676        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
677        assert_eq!(lexer.slice(), "_private");
678        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
679        assert_eq!(lexer.slice(), "myAgent");
680        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
681        assert_eq!(lexer.slice(), "agent2");
682        assert_eq!(lexer.next(), None);
683    }
684
685    #[test]
686    fn keyword_vs_identifier() {
687        // "agent" is a keyword, "agent_name" is an identifier
688        let mut lexer = Token::lexer("agent agent_name agents");
689        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
690        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
691        assert_eq!(lexer.slice(), "agent_name");
692        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
693        assert_eq!(lexer.slice(), "agents");
694        assert_eq!(lexer.next(), None);
695    }
696
697    #[test]
698    fn lex_punctuation() {
699        let mut lexer = Token::lexer("{ } ( ) [ ] , : . ->");
700        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
701        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
702        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
703        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
704        assert_eq!(lexer.next(), Some(Ok(Token::LBracket)));
705        assert_eq!(lexer.next(), Some(Ok(Token::RBracket)));
706        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
707        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
708        assert_eq!(lexer.next(), Some(Ok(Token::Dot)));
709        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
710        assert_eq!(lexer.next(), None);
711    }
712
713    #[test]
714    fn lex_operators() {
715        let mut lexer = Token::lexer("= == != < > <= >= + - * / % ! && || ++");
716        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
717        assert_eq!(lexer.next(), Some(Ok(Token::EqEq)));
718        assert_eq!(lexer.next(), Some(Ok(Token::Ne)));
719        assert_eq!(lexer.next(), Some(Ok(Token::Lt)));
720        assert_eq!(lexer.next(), Some(Ok(Token::Gt)));
721        assert_eq!(lexer.next(), Some(Ok(Token::Le)));
722        assert_eq!(lexer.next(), Some(Ok(Token::Ge)));
723        assert_eq!(lexer.next(), Some(Ok(Token::Plus)));
724        assert_eq!(lexer.next(), Some(Ok(Token::Minus)));
725        assert_eq!(lexer.next(), Some(Ok(Token::Star)));
726        assert_eq!(lexer.next(), Some(Ok(Token::Slash)));
727        assert_eq!(lexer.next(), Some(Ok(Token::Percent)));
728        assert_eq!(lexer.next(), Some(Ok(Token::Bang)));
729        assert_eq!(lexer.next(), Some(Ok(Token::And)));
730        assert_eq!(lexer.next(), Some(Ok(Token::Or)));
731        assert_eq!(lexer.next(), Some(Ok(Token::PlusPlus)));
732        assert_eq!(lexer.next(), None);
733    }
734
735    #[test]
736    fn skip_whitespace() {
737        let mut lexer = Token::lexer("  agent   belief\n\ttrue  ");
738        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
739        assert_eq!(lexer.next(), Some(Ok(Token::KwBelief)));
740        assert_eq!(lexer.next(), Some(Ok(Token::KwTrue)));
741        assert_eq!(lexer.next(), None);
742    }
743
744    #[test]
745    fn skip_comments() {
746        let mut lexer = Token::lexer("agent // this is a comment\nbelief");
747        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
748        assert_eq!(lexer.next(), Some(Ok(Token::KwBelief)));
749        assert_eq!(lexer.next(), None);
750    }
751
752    #[test]
753    fn comment_at_end() {
754        let mut lexer = Token::lexer("agent // comment at end");
755        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
756        assert_eq!(lexer.next(), None);
757    }
758
759    #[test]
760    fn lex_agent_declaration() {
761        let source = r#"
762            agent Researcher {
763                belief topic: String
764
765                on start {
766                    let result: Inferred<String> = infer("test")
767                    emit(result)
768                }
769            }
770        "#;
771        let tokens: Vec<_> = Token::lexer(source)
772            .map(|r| r.expect("valid token"))
773            .collect();
774
775        assert_eq!(tokens[0], Token::KwAgent);
776        assert_eq!(tokens[1], Token::Ident); // Researcher
777        assert_eq!(tokens[2], Token::LBrace);
778        assert_eq!(tokens[3], Token::KwBelief);
779        assert_eq!(tokens[4], Token::Ident); // topic
780        assert_eq!(tokens[5], Token::Colon);
781        assert_eq!(tokens[6], Token::TyString);
782        assert_eq!(tokens[7], Token::KwOn);
783        assert_eq!(tokens[8], Token::KwStart);
784        assert_eq!(tokens[9], Token::LBrace);
785        assert_eq!(tokens[10], Token::KwLet);
786    }
787
788    #[test]
789    fn is_keyword_helper() {
790        assert!(Token::KwAgent.is_keyword());
791        assert!(Token::KwLet.is_keyword());
792        assert!(!Token::TyInt.is_keyword());
793        assert!(!Token::Ident.is_keyword());
794    }
795
796    #[test]
797    fn is_type_keyword_helper() {
798        assert!(Token::TyInt.is_type_keyword());
799        assert!(Token::TyAgent.is_type_keyword());
800        assert!(!Token::KwAgent.is_type_keyword());
801        assert!(!Token::Ident.is_type_keyword());
802    }
803
804    #[test]
805    fn is_literal_helper() {
806        assert!(Token::IntLit.is_literal());
807        assert!(Token::FloatLit.is_literal());
808        assert!(Token::StringLit.is_literal());
809        assert!(Token::KwTrue.is_literal());
810        assert!(Token::KwFalse.is_literal());
811        assert!(!Token::Ident.is_literal());
812    }
813
814    #[test]
815    fn is_operator_helper() {
816        assert!(Token::Plus.is_operator());
817        assert!(Token::EqEq.is_operator());
818        assert!(Token::PlusPlus.is_operator());
819        assert!(!Token::LBrace.is_operator());
820        assert!(!Token::Ident.is_operator());
821    }
822
823    #[test]
824    fn lex_module_keywords() {
825        let mut lexer = Token::lexer("mod use pub as super");
826        assert_eq!(lexer.next(), Some(Ok(Token::KwMod)));
827        assert_eq!(lexer.next(), Some(Ok(Token::KwUse)));
828        assert_eq!(lexer.next(), Some(Ok(Token::KwPub)));
829        assert_eq!(lexer.next(), Some(Ok(Token::KwAs)));
830        assert_eq!(lexer.next(), Some(Ok(Token::KwSuper)));
831        assert_eq!(lexer.next(), None);
832    }
833
834    #[test]
835    fn lex_path_separator() {
836        let mut lexer = Token::lexer("agents::Researcher");
837        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
838        assert_eq!(lexer.slice(), "agents");
839        assert_eq!(lexer.next(), Some(Ok(Token::ColonColon)));
840        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
841        assert_eq!(lexer.slice(), "Researcher");
842        assert_eq!(lexer.next(), None);
843    }
844
845    #[test]
846    fn lex_use_statement() {
847        let mut lexer = Token::lexer("use agents::{Researcher, Coordinator as Coord}");
848        assert_eq!(lexer.next(), Some(Ok(Token::KwUse)));
849        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // agents
850        assert_eq!(lexer.next(), Some(Ok(Token::ColonColon)));
851        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
852        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Researcher
853        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
854        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Coordinator
855        assert_eq!(lexer.next(), Some(Ok(Token::KwAs)));
856        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Coord
857        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
858        assert_eq!(lexer.next(), None);
859    }
860
861    #[test]
862    fn lex_pub_agent() {
863        let mut lexer = Token::lexer("pub agent Researcher");
864        assert_eq!(lexer.next(), Some(Ok(Token::KwPub)));
865        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
866        assert_eq!(lexer.next(), Some(Ok(Token::Ident)));
867        assert_eq!(lexer.next(), None);
868    }
869
870    #[test]
871    fn token_display() {
872        assert_eq!(format!("{}", Token::KwAgent), "agent");
873        assert_eq!(format!("{}", Token::TyInt), "Int");
874        assert_eq!(format!("{}", Token::IntLit), "<int>");
875        assert_eq!(format!("{}", Token::Ident), "<ident>");
876        assert_eq!(format!("{}", Token::LBrace), "{");
877        assert_eq!(format!("{}", Token::PlusPlus), "++");
878    }
879
880    #[test]
881    fn lex_type_keywords_record_enum_match_const() {
882        let mut lexer = Token::lexer("record enum match const");
883        assert_eq!(lexer.next(), Some(Ok(Token::KwRecord)));
884        assert_eq!(lexer.next(), Some(Ok(Token::KwEnum)));
885        assert_eq!(lexer.next(), Some(Ok(Token::KwMatch)));
886        assert_eq!(lexer.next(), Some(Ok(Token::KwConst)));
887        assert_eq!(lexer.next(), None);
888    }
889
890    #[test]
891    fn lex_fat_arrow() {
892        let mut lexer = Token::lexer("=> -> =");
893        assert_eq!(lexer.next(), Some(Ok(Token::FatArrow)));
894        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
895        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
896        assert_eq!(lexer.next(), None);
897    }
898
899    #[test]
900    fn lex_match_expression() {
901        let mut lexer = Token::lexer("match status { Active => 1, Inactive => 0 }");
902        assert_eq!(lexer.next(), Some(Ok(Token::KwMatch)));
903        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // status
904        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
905        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Active
906        assert_eq!(lexer.next(), Some(Ok(Token::FatArrow)));
907        assert_eq!(lexer.next(), Some(Ok(Token::IntLit))); // 1
908        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
909        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Inactive
910        assert_eq!(lexer.next(), Some(Ok(Token::FatArrow)));
911        assert_eq!(lexer.next(), Some(Ok(Token::IntLit))); // 0
912        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
913        assert_eq!(lexer.next(), None);
914    }
915
916    #[test]
917    fn lex_record_declaration() {
918        let mut lexer = Token::lexer("record Point { x: Int, y: Int }");
919        assert_eq!(lexer.next(), Some(Ok(Token::KwRecord)));
920        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Point
921        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
922        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
923        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
924        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
925        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
926        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // y
927        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
928        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
929        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
930        assert_eq!(lexer.next(), None);
931    }
932
933    #[test]
934    fn lex_enum_declaration() {
935        let mut lexer = Token::lexer("enum Status { Active, Pending, Done }");
936        assert_eq!(lexer.next(), Some(Ok(Token::KwEnum)));
937        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Status
938        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
939        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Active
940        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
941        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Pending
942        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
943        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Done
944        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
945        assert_eq!(lexer.next(), None);
946    }
947
948    #[test]
949    fn lex_const_declaration() {
950        let mut lexer = Token::lexer("const MAX_RETRIES: Int = 3");
951        assert_eq!(lexer.next(), Some(Ok(Token::KwConst)));
952        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // MAX_RETRIES
953        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
954        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
955        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
956        assert_eq!(lexer.next(), Some(Ok(Token::IntLit))); // 3
957        assert_eq!(lexer.next(), None);
958    }
959
960    #[test]
961    fn new_keywords_are_keywords() {
962        assert!(Token::KwRecord.is_keyword());
963        assert!(Token::KwEnum.is_keyword());
964        assert!(Token::KwMatch.is_keyword());
965        assert!(Token::KwConst.is_keyword());
966    }
967
968    #[test]
969    fn lex_loop_break() {
970        let mut lexer = Token::lexer("loop { break }");
971        assert_eq!(lexer.next(), Some(Ok(Token::KwLoop)));
972        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
973        assert_eq!(lexer.next(), Some(Ok(Token::KwBreak)));
974        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
975        assert_eq!(lexer.next(), None);
976    }
977
978    #[test]
979    fn lex_receives_receive() {
980        let mut lexer = Token::lexer("agent Worker receives WorkerMsg { receive }");
981        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
982        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Worker
983        assert_eq!(lexer.next(), Some(Ok(Token::KwReceives)));
984        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // WorkerMsg
985        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
986        assert_eq!(lexer.next(), Some(Ok(Token::KwReceive)));
987        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
988        assert_eq!(lexer.next(), None);
989    }
990
991    #[test]
992    fn rfc6_keywords_are_keywords() {
993        assert!(Token::KwLoop.is_keyword());
994        assert!(Token::KwBreak.is_keyword());
995        assert!(Token::KwReceives.is_keyword());
996        assert!(Token::KwReceive.is_keyword());
997    }
998
999    #[test]
1000    fn lex_error_handling_keywords() {
1001        let mut lexer = Token::lexer("fails try catch error");
1002        assert_eq!(lexer.next(), Some(Ok(Token::KwFails)));
1003        assert_eq!(lexer.next(), Some(Ok(Token::KwTry)));
1004        assert_eq!(lexer.next(), Some(Ok(Token::KwCatch)));
1005        assert_eq!(lexer.next(), Some(Ok(Token::KwError)));
1006        assert_eq!(lexer.next(), None);
1007    }
1008
1009    #[test]
1010    fn lex_try_catch_expression() {
1011        let mut lexer = Token::lexer("let x = try infer(prompt) catch { fallback }");
1012        assert_eq!(lexer.next(), Some(Ok(Token::KwLet)));
1013        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
1014        assert_eq!(lexer.next(), Some(Ok(Token::Eq)));
1015        assert_eq!(lexer.next(), Some(Ok(Token::KwTry)));
1016        assert_eq!(lexer.next(), Some(Ok(Token::KwInfer)));
1017        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1018        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // prompt
1019        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1020        assert_eq!(lexer.next(), Some(Ok(Token::KwCatch)));
1021        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1022        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // fallback
1023        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1024        assert_eq!(lexer.next(), None);
1025    }
1026
1027    #[test]
1028    fn lex_fails_function() {
1029        let mut lexer = Token::lexer("fn fetch(url: String) -> String fails { }");
1030        assert_eq!(lexer.next(), Some(Ok(Token::KwFn)));
1031        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // fetch
1032        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1033        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // url
1034        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
1035        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1036        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1037        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
1038        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1039        assert_eq!(lexer.next(), Some(Ok(Token::KwFails)));
1040        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1041        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1042        assert_eq!(lexer.next(), None);
1043    }
1044
1045    #[test]
1046    fn lex_on_error_handler() {
1047        let mut lexer = Token::lexer("on error(e) { emit(fallback) }");
1048        assert_eq!(lexer.next(), Some(Ok(Token::KwOn)));
1049        assert_eq!(lexer.next(), Some(Ok(Token::KwError)));
1050        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1051        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // e
1052        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1053        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1054        assert_eq!(lexer.next(), Some(Ok(Token::KwEmit)));
1055        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1056        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // fallback
1057        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1058        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1059        assert_eq!(lexer.next(), None);
1060    }
1061
1062    #[test]
1063    fn rfc7_keywords_are_keywords() {
1064        assert!(Token::KwFail.is_keyword());
1065        assert!(Token::KwFails.is_keyword());
1066        assert!(Token::KwTry.is_keyword());
1067        assert!(Token::KwCatch.is_keyword());
1068        assert!(Token::KwError.is_keyword());
1069    }
1070
1071    #[test]
1072    fn lex_fail_expression() {
1073        let mut lexer = Token::lexer("fail \"error message\"");
1074        assert_eq!(lexer.next(), Some(Ok(Token::KwFail)));
1075        assert_eq!(lexer.next(), Some(Ok(Token::StringLit)));
1076        assert_eq!(lexer.next(), None);
1077    }
1078
1079    // =========================================================================
1080    // RFC-0009: Closures
1081    // =========================================================================
1082
1083    #[test]
1084    fn lex_closure_syntax() {
1085        // |x: Int| x + 1
1086        let mut lexer = Token::lexer("|x: Int| x + 1");
1087        assert_eq!(lexer.next(), Some(Ok(Token::Pipe)));
1088        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
1089        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
1090        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
1091        assert_eq!(lexer.next(), Some(Ok(Token::Pipe)));
1092        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // x
1093        assert_eq!(lexer.next(), Some(Ok(Token::Plus)));
1094        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
1095        assert_eq!(lexer.next(), None);
1096    }
1097
1098    #[test]
1099    fn lex_empty_closure() {
1100        // || 42
1101        let mut lexer = Token::lexer("|| 42");
1102        assert_eq!(lexer.next(), Some(Ok(Token::Or))); // || lexes as Or
1103        assert_eq!(lexer.next(), Some(Ok(Token::IntLit)));
1104        assert_eq!(lexer.next(), None);
1105    }
1106
1107    #[test]
1108    fn lex_fn_type() {
1109        // Fn(Int, String) -> Bool
1110        let mut lexer = Token::lexer("Fn(Int, String) -> Bool");
1111        assert_eq!(lexer.next(), Some(Ok(Token::TyFn)));
1112        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1113        assert_eq!(lexer.next(), Some(Ok(Token::TyInt)));
1114        assert_eq!(lexer.next(), Some(Ok(Token::Comma)));
1115        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1116        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1117        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
1118        assert_eq!(lexer.next(), Some(Ok(Token::TyBool)));
1119        assert_eq!(lexer.next(), None);
1120    }
1121
1122    #[test]
1123    fn fn_is_type_keyword() {
1124        assert!(Token::TyFn.is_type_keyword());
1125    }
1126
1127    #[test]
1128    fn pipe_display() {
1129        assert_eq!(format!("{}", Token::Pipe), "|");
1130        assert_eq!(format!("{}", Token::TyFn), "Fn");
1131    }
1132
1133    // =========================================================================
1134    // RFC-0011: Tool Support
1135    // =========================================================================
1136
1137    #[test]
1138    fn lex_tool_keyword() {
1139        let mut lexer = Token::lexer("tool Http { fn get(url: String) -> String }");
1140        assert_eq!(lexer.next(), Some(Ok(Token::KwTool)));
1141        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Http
1142        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1143        assert_eq!(lexer.next(), Some(Ok(Token::KwFn)));
1144        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // get
1145        assert_eq!(lexer.next(), Some(Ok(Token::LParen)));
1146        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // url
1147        assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
1148        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1149        assert_eq!(lexer.next(), Some(Ok(Token::RParen)));
1150        assert_eq!(lexer.next(), Some(Ok(Token::Arrow)));
1151        assert_eq!(lexer.next(), Some(Ok(Token::TyString)));
1152        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1153        assert_eq!(lexer.next(), None);
1154    }
1155
1156    #[test]
1157    fn tool_is_keyword() {
1158        assert!(Token::KwTool.is_keyword());
1159    }
1160
1161    #[test]
1162    fn lex_agent_use_tool() {
1163        let mut lexer = Token::lexer("agent Fetcher { use Http }");
1164        assert_eq!(lexer.next(), Some(Ok(Token::KwAgent)));
1165        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Fetcher
1166        assert_eq!(lexer.next(), Some(Ok(Token::LBrace)));
1167        assert_eq!(lexer.next(), Some(Ok(Token::KwUse)));
1168        assert_eq!(lexer.next(), Some(Ok(Token::Ident))); // Http
1169        assert_eq!(lexer.next(), Some(Ok(Token::RBrace)));
1170        assert_eq!(lexer.next(), None);
1171    }
1172}