Skip to main content

ternlang_core/
lexer.rs

1use logos::Logos;
2
3#[derive(Logos, Debug, PartialEq, Clone)]
4#[logos(skip r"[ \t\n\f]+")] // Skip whitespace
5#[logos(skip(r"//[^\n]*", allow_greedy = true))]   // Skip line comments
6#[logos(skip(r"/\*[^*]*\*+(?:[^*/][^*]*\*+)*/", allow_greedy = true))]  // Skip block comments /* ... */
7pub enum Token {
8    #[regex(r"[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok(), priority = 100)]
9    #[regex(r"[0-9]+[eE][+-]?[0-9]+", |lex| lex.slice().parse::<f64>().ok(), priority = 100)]
10    Float(f64),
11
12    // Ternary Specific
13    #[token("affirm", priority = 4)]
14    Affirm,
15
16    #[token("tend", priority = 4)]
17    #[token("hold", priority = 4)]
18    Tend,
19
20    #[token("reject", priority = 4)]
21    Reject,
22
23    #[token("?")]
24    UncertainBranch,
25
26    #[token("trit", priority = 3)]
27    TritType,
28
29    #[token("trittensor", priority = 3)]
30    TritTensor,
31
32    #[token("sparseskip", priority = 3)]
33    SparseSkip,
34
35    #[token("packed", priority = 3)]
36    Packed,
37
38
39    // Standard Keywords
40    #[token("if", priority = 3)]
41    If,
42
43    #[token("else", priority = 3)]
44    Else,
45
46    #[token("fn", priority = 3)]
47    Fn,
48
49    #[token("return", priority = 3)]
50    Return,
51
52    #[token("let", priority = 3)]
53    Let,
54
55    #[token("match", priority = 3)]
56    Match,
57
58    #[token("for", priority = 3)]
59    For,
60
61    #[token("in", priority = 3)]
62    In,
63
64    #[token("while", priority = 3)]
65    While,
66
67    #[token("loop", priority = 3)]
68    Loop,
69
70    #[token("break", priority = 3)]
71    Break,
72
73    #[token("continue", priority = 3)]
74    Continue,
75
76    #[token("mut", priority = 3)]
77    Mut,
78
79    #[token("use", priority = 3)]
80    Use,
81
82    #[token("from", priority = 3)]
83    From,
84
85    #[token("import", priority = 3)]
86    Import,
87
88    #[token("module", priority = 3)]
89    Module,
90
91    #[token("pub", priority = 3)]
92    Pub,
93
94    #[token("struct", priority = 3)]
95    Struct,
96
97    // Actor model keywords
98    #[token("agent", priority = 3)]
99    Agent,
100
101    #[token("spawn", priority = 3)]
102    Spawn,
103
104    #[token("send", priority = 3)]
105    Send,
106
107    #[token("await", priority = 3)]
108    Await,
109
110    #[token("agentref", priority = 3)]
111    AgentRef,
112
113    #[token("remote", priority = 3)]
114    Remote,
115
116    #[token("nodeid", priority = 3)]
117    NodeId,
118
119    // Operators
120    #[token("+")]
121    Plus,
122
123    #[token("-")]
124    Minus,
125
126    #[token("*")]
127    Star,
128
129    #[token("/")]
130    Slash,
131
132    #[token("%")]
133    Percent,
134
135    #[token("=")]
136    Assign,
137
138    #[token("==")]
139    Equal,
140
141    #[token("=>")]
142    FatArrow,
143
144    #[token("!=")]
145    NotEqual,
146
147    #[token("::")]
148    DoubleColon,
149
150    #[token("&&")]
151    And,
152
153    #[token("||")]
154    Or,
155
156    // Delimiters
157    #[token("(", priority = 3)]
158    LParen,
159
160    #[token(")", priority = 3)]
161    RParen,
162
163    #[token("{", priority = 3)]
164    LBrace,
165
166    #[token("}", priority = 3)]
167    RBrace,
168
169    #[token("[", priority = 3)]
170    LBracket,
171
172    #[token("]", priority = 3)]
173    RBracket,
174
175    #[token("<", priority = 3)]
176    LAngle,
177
178    #[token(">", priority = 3)]
179    RAngle,
180
181    #[token("<=", priority = 4)]
182    LessEqual,
183
184    #[token(">=", priority = 4)]
185    GreaterEqual,
186
187    #[token(",", priority = 3)]
188    Comma,
189
190    #[token(";", priority = 3)]
191    Semicolon,
192
193    #[token(":")]
194    Colon,
195
196    #[token("@")]
197    At,
198
199    #[token("->")]
200    Arrow,
201
202    #[token("..")]
203    Range,
204
205    #[token(".")]
206    Dot,
207
208    // Literals
209    #[regex("[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string(), priority = 1)]
210    Ident(String),
211
212    // PARSER-LIT-001: hex (0xFF) and binary (0b1010) integer literals.
213    // Must have higher priority than the plain decimal rule so the '0' prefix is
214    // consumed as part of the full literal rather than tokenised as Int(0) + Ident.
215    #[regex(r"0[xX][0-9a-fA-F]+", |lex| i64::from_str_radix(&lex.slice()[2..], 16).ok(), priority = 20)]
216    #[regex(r"0[bB][01]+", |lex| i64::from_str_radix(&lex.slice()[2..], 2).ok(), priority = 20)]
217    #[regex("[0-9]+", |lex| lex.slice().parse::<i64>().ok(), priority = 10)]
218    Int(i64),
219
220    /// Double-quoted string literal: "value"
221    #[regex(r#""[^"]*""#, |lex| {
222        let s = lex.slice();
223        Some(s[1..s.len()-1].to_string())
224    }, priority = 2)]
225    StringLit(String),
226
227    // Unused but kept for variant compatibility
228    #[token("___TRIT_LITERAL_UNUSED___")]
229    TritLiteral,
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    #[test]
237    fn test_float_scientific_notation() {
238        let cases: &[(&str, f64)] = &[
239            ("1.23e-10",  1.23e-10),
240            ("1.23E+10",  1.23e10),
241            ("9.5e2",     9.5e2),
242            ("1e5",       1e5),
243            ("2E-3",      2e-3),
244        ];
245        for (input, expected) in cases {
246            let mut lex = Token::lexer(input);
247            match lex.next() {
248                Some(Ok(Token::Float(v))) => {
249                    let rel = (v - expected).abs() / expected.abs().max(f64::EPSILON);
250                    assert!(rel < 1e-12, "input={input} got={v} expected={expected}");
251                }
252                other => panic!("input={input}: expected Float, got {other:?}"),
253            }
254        }
255    }
256
257    #[test]
258    fn test_lexer() {
259        let input = "let x: trit = 1; if x ? { return 0; }";
260        let mut lex = Token::lexer(input);
261
262        assert_eq!(lex.next(), Some(Ok(Token::Let)));
263        assert_eq!(lex.next(), Some(Ok(Token::Ident("x".to_string()))));
264        assert_eq!(lex.next(), Some(Ok(Token::Colon)));
265        assert_eq!(lex.next(), Some(Ok(Token::TritType)));
266        assert_eq!(lex.next(), Some(Ok(Token::Assign)));
267        assert_eq!(lex.next(), Some(Ok(Token::Int(1))));
268        assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
269        assert_eq!(lex.next(), Some(Ok(Token::If)));
270        assert_eq!(lex.next(), Some(Ok(Token::Ident("x".to_string()))));
271        assert_eq!(lex.next(), Some(Ok(Token::UncertainBranch)));
272        assert_eq!(lex.next(), Some(Ok(Token::LBrace)));
273        assert_eq!(lex.next(), Some(Ok(Token::Return)));
274        assert_eq!(lex.next(), Some(Ok(Token::Int(0))));
275        assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
276        assert_eq!(lex.next(), Some(Ok(Token::RBrace)));
277    }
278}