Skip to main content

ternlang_core/
lexer.rs

1use logos::Logos;
2
3#[derive(Logos, Debug, PartialEq, Clone)]
4#[logos(skip r"[ \t\n\f]+")] // Skip whitespace
5#[logos(skip(r"//[^\n]*", allow_greedy = true))]   // Skip line comments
6#[logos(skip(r"/\*[^*]*\*+(?:[^*/][^*]*\*+)*/", allow_greedy = true))]  // Skip block comments /* ... */
7pub enum Token {
8    #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok(), priority = 100)]
9    Float(f64),
10
11    // Ternary Specific
12    #[token("affirm", priority = 4)]
13    Affirm,
14
15    #[token("tend", priority = 4)]
16    #[token("hold", priority = 4)]
17    Tend,
18
19    #[token("reject", priority = 4)]
20    Reject,
21
22    #[token("?")]
23    UncertainBranch,
24
25    #[token("trit", priority = 3)]
26    TritType,
27
28    #[token("trittensor", priority = 3)]
29    TritTensor,
30
31    #[token("sparseskip", priority = 3)]
32    SparseSkip,
33
34
35    // Standard Keywords
36    #[token("if", priority = 3)]
37    If,
38
39    #[token("else", priority = 3)]
40    Else,
41
42    #[token("fn", priority = 3)]
43    Fn,
44
45    #[token("return", priority = 3)]
46    Return,
47
48    #[token("let", priority = 3)]
49    Let,
50
51    #[token("match", priority = 3)]
52    Match,
53
54    #[token("for", priority = 3)]
55    For,
56
57    #[token("in", priority = 3)]
58    In,
59
60    #[token("while", priority = 3)]
61    While,
62
63    #[token("loop", priority = 3)]
64    Loop,
65
66    #[token("break", priority = 3)]
67    Break,
68
69    #[token("continue", priority = 3)]
70    Continue,
71
72    #[token("mut", priority = 3)]
73    Mut,
74
75    #[token("use", priority = 3)]
76    Use,
77
78    #[token("from", priority = 3)]
79    From,
80
81    #[token("import", priority = 3)]
82    Import,
83
84    #[token("module", priority = 3)]
85    Module,
86
87    #[token("pub", priority = 3)]
88    Pub,
89
90    #[token("struct", priority = 3)]
91    Struct,
92
93    // Actor model keywords
94    #[token("agent", priority = 3)]
95    Agent,
96
97    #[token("spawn", priority = 3)]
98    Spawn,
99
100    #[token("send", priority = 3)]
101    Send,
102
103    #[token("await", priority = 3)]
104    Await,
105
106    #[token("agentref", priority = 3)]
107    AgentRef,
108
109    #[token("remote", priority = 3)]
110    Remote,
111
112    #[token("nodeid", priority = 3)]
113    NodeId,
114
115    // Operators
116    #[token("+")]
117    Plus,
118
119    #[token("-")]
120    Minus,
121
122    #[token("*")]
123    Star,
124
125    #[token("/")]
126    Slash,
127
128    #[token("%")]
129    Percent,
130
131    #[token("=")]
132    Assign,
133
134    #[token("==")]
135    Equal,
136
137    #[token("=>")]
138    FatArrow,
139
140    #[token("!=")]
141    NotEqual,
142
143    #[token("::")]
144    DoubleColon,
145
146    #[token("&&")]
147    And,
148
149    #[token("||")]
150    Or,
151
152    // Delimiters
153    #[token("(", priority = 3)]
154    LParen,
155
156    #[token(")", priority = 3)]
157    RParen,
158
159    #[token("{", priority = 3)]
160    LBrace,
161
162    #[token("}", priority = 3)]
163    RBrace,
164
165    #[token("[", priority = 3)]
166    LBracket,
167
168    #[token("]", priority = 3)]
169    RBracket,
170
171    #[token("<", priority = 3)]
172    LAngle,
173
174    #[token(">", priority = 3)]
175    RAngle,
176
177    #[token("<=", priority = 4)]
178    LessEqual,
179
180    #[token(">=", priority = 4)]
181    GreaterEqual,
182
183    #[token(",", priority = 3)]
184    Comma,
185
186    #[token(";", priority = 3)]
187    Semicolon,
188
189    #[token(":")]
190    Colon,
191
192    #[token("@")]
193    At,
194
195    #[token("->")]
196    Arrow,
197
198    #[token(".")]
199    Dot,
200
201    // Literals
202    #[regex("[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string(), priority = 1)]
203    Ident(String),
204
205    // PARSER-LIT-001: hex (0xFF) and binary (0b1010) integer literals.
206    // Must have higher priority than the plain decimal rule so the '0' prefix is
207    // consumed as part of the full literal rather than tokenised as Int(0) + Ident.
208    #[regex(r"0[xX][0-9a-fA-F]+", |lex| i64::from_str_radix(&lex.slice()[2..], 16).ok(), priority = 20)]
209    #[regex(r"0[bB][01]+", |lex| i64::from_str_radix(&lex.slice()[2..], 2).ok(), priority = 20)]
210    #[regex("[0-9]+", |lex| lex.slice().parse::<i64>().ok(), priority = 10)]
211    Int(i64),
212
213    /// Double-quoted string literal: "value"
214    #[regex(r#""[^"]*""#, |lex| {
215        let s = lex.slice();
216        Some(s[1..s.len()-1].to_string())
217    }, priority = 2)]
218    StringLit(String),
219
220    // Unused but kept for variant compatibility
221    #[token("___TRIT_LITERAL_UNUSED___")]
222    TritLiteral,
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    #[test]
230    fn test_lexer() {
231        let input = "let x: trit = 1; if x ? { return 0; }";
232        let mut lex = Token::lexer(input);
233
234        assert_eq!(lex.next(), Some(Ok(Token::Let)));
235        assert_eq!(lex.next(), Some(Ok(Token::Ident("x".to_string()))));
236        assert_eq!(lex.next(), Some(Ok(Token::Colon)));
237        assert_eq!(lex.next(), Some(Ok(Token::TritType)));
238        assert_eq!(lex.next(), Some(Ok(Token::Assign)));
239        assert_eq!(lex.next(), Some(Ok(Token::Int(1))));
240        assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
241        assert_eq!(lex.next(), Some(Ok(Token::If)));
242        assert_eq!(lex.next(), Some(Ok(Token::Ident("x".to_string()))));
243        assert_eq!(lex.next(), Some(Ok(Token::UncertainBranch)));
244        assert_eq!(lex.next(), Some(Ok(Token::LBrace)));
245        assert_eq!(lex.next(), Some(Ok(Token::Return)));
246        assert_eq!(lex.next(), Some(Ok(Token::Int(0))));
247        assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
248        assert_eq!(lex.next(), Some(Ok(Token::RBrace)));
249    }
250}