Skip to main content

ternlang_core/
lexer.rs

1use logos::Logos;
2
3#[derive(Logos, Debug, PartialEq, Clone)]
4#[logos(skip r"[ \t\n\f]+")] // Skip whitespace
5#[logos(skip(r"//[^\n]*", allow_greedy = true))]   // Skip line comments
6#[logos(skip(r"/\*[^*]*\*+(?:[^*/][^*]*\*+)*/", allow_greedy = true))]  // Skip block comments /* ... */
7pub enum Token {
8    #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok(), priority = 100)]
9    Float(f64),
10
11    // Ternary Specific
12    #[token("affirm", priority = 4)]
13    Affirm,
14
15    #[token("tend", priority = 4)]
16    #[token("hold", priority = 4)]
17    Tend,
18
19    #[token("reject", priority = 4)]
20    Reject,
21
22    #[token("?")]
23    UncertainBranch,
24
25    #[token("trit", priority = 3)]
26    TritType,
27
28    #[token("trittensor", priority = 3)]
29    TritTensor,
30
31    #[token("sparseskip", priority = 3)]
32    SparseSkip,
33
34    #[token("packed", priority = 3)]
35    Packed,
36
37
38    // Standard Keywords
39    #[token("if", priority = 3)]
40    If,
41
42    #[token("else", priority = 3)]
43    Else,
44
45    #[token("fn", priority = 3)]
46    Fn,
47
48    #[token("return", priority = 3)]
49    Return,
50
51    #[token("let", priority = 3)]
52    Let,
53
54    #[token("match", priority = 3)]
55    Match,
56
57    #[token("for", priority = 3)]
58    For,
59
60    #[token("in", priority = 3)]
61    In,
62
63    #[token("while", priority = 3)]
64    While,
65
66    #[token("loop", priority = 3)]
67    Loop,
68
69    #[token("break", priority = 3)]
70    Break,
71
72    #[token("continue", priority = 3)]
73    Continue,
74
75    #[token("mut", priority = 3)]
76    Mut,
77
78    #[token("use", priority = 3)]
79    Use,
80
81    #[token("from", priority = 3)]
82    From,
83
84    #[token("import", priority = 3)]
85    Import,
86
87    #[token("module", priority = 3)]
88    Module,
89
90    #[token("pub", priority = 3)]
91    Pub,
92
93    #[token("struct", priority = 3)]
94    Struct,
95
96    // Actor model keywords
97    #[token("agent", priority = 3)]
98    Agent,
99
100    #[token("spawn", priority = 3)]
101    Spawn,
102
103    #[token("send", priority = 3)]
104    Send,
105
106    #[token("await", priority = 3)]
107    Await,
108
109    #[token("agentref", priority = 3)]
110    AgentRef,
111
112    #[token("remote", priority = 3)]
113    Remote,
114
115    #[token("nodeid", priority = 3)]
116    NodeId,
117
118    // Operators
119    #[token("+")]
120    Plus,
121
122    #[token("-")]
123    Minus,
124
125    #[token("*")]
126    Star,
127
128    #[token("/")]
129    Slash,
130
131    #[token("%")]
132    Percent,
133
134    #[token("=")]
135    Assign,
136
137    #[token("==")]
138    Equal,
139
140    #[token("=>")]
141    FatArrow,
142
143    #[token("!=")]
144    NotEqual,
145
146    #[token("::")]
147    DoubleColon,
148
149    #[token("&&")]
150    And,
151
152    #[token("||")]
153    Or,
154
155    // Delimiters
156    #[token("(", priority = 3)]
157    LParen,
158
159    #[token(")", priority = 3)]
160    RParen,
161
162    #[token("{", priority = 3)]
163    LBrace,
164
165    #[token("}", priority = 3)]
166    RBrace,
167
168    #[token("[", priority = 3)]
169    LBracket,
170
171    #[token("]", priority = 3)]
172    RBracket,
173
174    #[token("<", priority = 3)]
175    LAngle,
176
177    #[token(">", priority = 3)]
178    RAngle,
179
180    #[token("<=", priority = 4)]
181    LessEqual,
182
183    #[token(">=", priority = 4)]
184    GreaterEqual,
185
186    #[token(",", priority = 3)]
187    Comma,
188
189    #[token(";", priority = 3)]
190    Semicolon,
191
192    #[token(":")]
193    Colon,
194
195    #[token("@")]
196    At,
197
198    #[token("->")]
199    Arrow,
200
201    #[token("..")]
202    Range,
203
204    #[token(".")]
205    Dot,
206
207    // Literals
208    #[regex("[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string(), priority = 1)]
209    Ident(String),
210
211    // PARSER-LIT-001: hex (0xFF) and binary (0b1010) integer literals.
212    // Must have higher priority than the plain decimal rule so the '0' prefix is
213    // consumed as part of the full literal rather than tokenised as Int(0) + Ident.
214    #[regex(r"0[xX][0-9a-fA-F]+", |lex| i64::from_str_radix(&lex.slice()[2..], 16).ok(), priority = 20)]
215    #[regex(r"0[bB][01]+", |lex| i64::from_str_radix(&lex.slice()[2..], 2).ok(), priority = 20)]
216    #[regex("[0-9]+", |lex| lex.slice().parse::<i64>().ok(), priority = 10)]
217    Int(i64),
218
219    /// Double-quoted string literal: "value"
220    #[regex(r#""[^"]*""#, |lex| {
221        let s = lex.slice();
222        Some(s[1..s.len()-1].to_string())
223    }, priority = 2)]
224    StringLit(String),
225
226    // Unused but kept for variant compatibility
227    #[token("___TRIT_LITERAL_UNUSED___")]
228    TritLiteral,
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn test_lexer() {
237        let input = "let x: trit = 1; if x ? { return 0; }";
238        let mut lex = Token::lexer(input);
239
240        assert_eq!(lex.next(), Some(Ok(Token::Let)));
241        assert_eq!(lex.next(), Some(Ok(Token::Ident("x".to_string()))));
242        assert_eq!(lex.next(), Some(Ok(Token::Colon)));
243        assert_eq!(lex.next(), Some(Ok(Token::TritType)));
244        assert_eq!(lex.next(), Some(Ok(Token::Assign)));
245        assert_eq!(lex.next(), Some(Ok(Token::Int(1))));
246        assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
247        assert_eq!(lex.next(), Some(Ok(Token::If)));
248        assert_eq!(lex.next(), Some(Ok(Token::Ident("x".to_string()))));
249        assert_eq!(lex.next(), Some(Ok(Token::UncertainBranch)));
250        assert_eq!(lex.next(), Some(Ok(Token::LBrace)));
251        assert_eq!(lex.next(), Some(Ok(Token::Return)));
252        assert_eq!(lex.next(), Some(Ok(Token::Int(0))));
253        assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
254        assert_eq!(lex.next(), Some(Ok(Token::RBrace)));
255    }
256}