hexput_ast_api/
lexer.rs

1use logos::{Logos, Lexer, Span};
2use crate::ast_structs::SourceLocation;
3
4#[derive(Logos, Debug, PartialEq, Clone)]
5#[logos(skip r"[ \t\n\f]+", error = TokenError)]
6pub enum Token {
7    // Keywords
8    #[token("vl")]
9    Vl,
10    
11    #[token("if")]
12    If,
13    
14    #[token("else")]
15    Else,
16    
17    #[token("cb")]
18    Cb,
19    
20    #[token("res")]
21    Res,
22    
23    #[token("loop")]
24    Loop,
25    
26    #[token("in")]
27    In,
28    
29    #[token("end")]
30    End,
31    
32    #[token("continue")]
33    Continue,
34    
35    #[token("keysof")]
36    KeysOf,
37    
38    // Boolean and null literals
39    #[token("true")]
40    True,
41    
42    #[token("false")]
43    False,
44    
45    #[token("null")]
46    Null,
47    
48    // Identifiers
49    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned(), priority = 2)]
50    Identifier(String),
51
52    // Comments
53    #[regex(r"//[^\n]*", logos::skip)]
54    Comment,
55    
56    // Literals
57    #[regex(r#""([^"\\]|\\.)*""#, string_literal)]
58    StringLiteral(String),
59    
60    #[regex(r"-?[0-9]+(\.[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
61    NumberLiteral(f64),
62    
63    // Operators
64    #[token("!")]
65    Bang,
66    
67    #[token("=")]
68    Equal,
69    
70    #[token("==")]
71    EqualEqual,
72    
73    #[token("+")]
74    Plus,
75    
76    #[token("*")]
77    Multiply,
78    
79    #[token("/", priority = 1)]
80    Divide,
81    
82    // Comparators
83    #[token(">=")]
84    GreaterEqual,
85    
86    #[token("<=")]
87    LessEqual,
88    
89    #[token(">")]
90    Greater,
91    
92    #[token("<")]
93    Less,
94    
95    // Delimiters
96    #[token("{")]
97    OpenBrace,
98    
99    #[token("}")]
100    CloseBrace,
101    
102    #[token("(")]
103    OpenParen,
104    
105    #[token(")")]
106    CloseParen,
107    
108    #[token(",")]
109    Comma,
110    
111    #[token(";")]
112    Semicolon,
113
114    // Object and array tokens
115    #[token("[")]
116    OpenBracket,
117    
118    #[token("]")]
119    CloseBracket,
120    
121    #[token(":")]
122    Colon,
123
124    #[token(".")]
125    Dot,
126}
127
128#[derive(Debug, Clone, PartialEq, Default)]
129pub struct TokenError;
130
131fn string_literal(lex: &mut Lexer<Token>) -> Option<String> {
132    let slice = lex.slice();
133    
134    let content = &slice[1..slice.len() - 1];
135    
136    let mut processed = String::new();
137    let mut chars = content.chars();
138    
139    while let Some(c) = chars.next() {
140        if c == '\\' {
141            if let Some(next) = chars.next() {
142                match next {
143                    'n' => processed.push('\n'),
144                    't' => processed.push('\t'),
145                    'r' => processed.push('\r'),
146                    '\\' => processed.push('\\'),
147                    '"' => processed.push('"'),
148                    _ => {
149                        // Invalid escape sequence
150                        processed.push('\\');
151                        processed.push(next);
152                    }
153                }
154            }
155        } else {
156            processed.push(c);
157        }
158    }
159    
160    Some(processed)
161}
162
163pub struct TokenWithSpan {
164    pub token: Token,
165    pub span: Span,
166}
167
168impl TokenWithSpan {
169    pub fn get_location(&self, source_code: &str) -> SourceLocation {
170        SourceLocation::from_spans(source_code, self.span.start, self.span.end)
171    }
172}
173
174pub fn tokenize(source: &str) -> Vec<TokenWithSpan> {
175    let mut lexer = Token::lexer(source);
176    let mut tokens = Vec::new();
177    
178    while let Some(token) = lexer.next() {
179        if let Ok(token) = token {
180            tokens.push(TokenWithSpan {
181                token: token.clone(),
182                span: lexer.span(),
183            });
184        }
185    }
186    
187    tokens
188}