hexput_ast_api/
lexer.rs

1use logos::{Logos, Lexer, Span};
2use crate::ast_structs::SourceLocation;
3
4#[derive(Logos, Debug, PartialEq, Clone)]
5#[logos(skip r"[ \t\n\f]+", error = TokenError)]
6pub enum Token {
7    // Keywords
8    #[token("vl")]
9    Vl,
10    
11    #[token("if")]
12    If,
13    
14    #[token("else")]
15    Else,
16    
17    #[token("cb")]
18    Cb,
19    
20    #[token("res")]
21    Res,
22    
23    #[token("loop")]
24    Loop,
25    
26    #[token("in")]
27    In,
28    
29    #[token("end")]
30    End,
31    
32    #[token("continue")]
33    Continue,
34    
35    #[token("keysof")]
36    KeysOf,
37    
38    // Boolean and null literals
39    #[token("true")]
40    True,
41    
42    #[token("false")]
43    False,
44    
45    #[token("null")]
46    Null,
47    
48    // Identifiers
49    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned(), priority = 2)]
50    Identifier(String),
51
52    // Comments
53    #[regex(r"//[^\n]*", logos::skip)]
54    Comment,
55    
56    // Literals
57    #[regex(r#""([^"\\]|\\.)*""#, string_literal)]
58    StringLiteral(String),
59    
60    #[regex(r"-?[0-9]+(\.[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
61    NumberLiteral(f64),
62    
63    // Operators
64    #[token("!")]
65    Bang,
66    
67    #[token("=")]
68    Equal,
69    
70    #[token("==")]
71    EqualEqual,
72    
73    #[token("+")]
74    Plus,
75    
76    #[token("*")]
77    Multiply,
78    
79    #[token("/", priority = 1)]
80    Divide,
81    
82    // Logical operators
83    #[token("&&")]
84    And,
85    
86    #[token("||")]
87    Or,
88    
89    // Comparators
90    #[token(">=")]
91    GreaterEqual,
92    
93    #[token("<=")]
94    LessEqual,
95    
96    #[token(">")]
97    Greater,
98    
99    #[token("<")]
100    Less,
101    
102    // Delimiters
103    #[token("{")]
104    OpenBrace,
105    
106    #[token("}")]
107    CloseBrace,
108    
109    #[token("(")]
110    OpenParen,
111    
112    #[token(")")]
113    CloseParen,
114    
115    #[token(",")]
116    Comma,
117    
118    #[token(";")]
119    Semicolon,
120
121    // Object and array tokens
122    #[token("[")]
123    OpenBracket,
124    
125    #[token("]")]
126    CloseBracket,
127    
128    #[token(":")]
129    Colon,
130
131    #[token(".")]
132    Dot,
133}
134
135#[derive(Debug, Clone, PartialEq, Default)]
136pub struct TokenError;
137
138fn string_literal(lex: &mut Lexer<Token>) -> Option<String> {
139    let slice = lex.slice();
140    
141    let content = &slice[1..slice.len() - 1];
142    
143    let mut processed = String::new();
144    let mut chars = content.chars();
145    
146    while let Some(c) = chars.next() {
147        if c == '\\' {
148            if let Some(next) = chars.next() {
149                match next {
150                    'n' => processed.push('\n'),
151                    't' => processed.push('\t'),
152                    'r' => processed.push('\r'),
153                    '\\' => processed.push('\\'),
154                    '"' => processed.push('"'),
155                    _ => {
156                        // Invalid escape sequence
157                        processed.push('\\');
158                        processed.push(next);
159                    }
160                }
161            }
162        } else {
163            processed.push(c);
164        }
165    }
166    
167    Some(processed)
168}
169
170pub struct TokenWithSpan {
171    pub token: Token,
172    pub span: Span,
173}
174
175impl TokenWithSpan {
176    pub fn get_location(&self, source_code: &str) -> SourceLocation {
177        SourceLocation::from_spans(source_code, self.span.start, self.span.end)
178    }
179}
180
181pub fn tokenize(source: &str) -> Vec<TokenWithSpan> {
182    let mut lexer = Token::lexer(source);
183    let mut tokens = Vec::new();
184    
185    while let Some(token) = lexer.next() {
186        if let Ok(token) = token {
187            tokens.push(TokenWithSpan {
188                token: token.clone(),
189                span: lexer.span(),
190            });
191        }
192    }
193    
194    tokens
195}