hexput_ast_api/
lexer.rs

1use logos::{Logos, Lexer, Span};
2use crate::ast_structs::SourceLocation;
3
4#[derive(Logos, Debug, PartialEq, Clone)]
5#[logos(skip r"[ \t\n\f]+", error = TokenError)]
6pub enum Token {
7    // Keywords
8    #[token("vl")]
9    Vl,
10    
11    #[token("if")]
12    If,
13    
14    #[token("else")]
15    Else,
16    
17    #[token("cb")]
18    Cb,
19    
20    #[token("res")]
21    Res,
22    
23    #[token("loop")]
24    Loop,
25    
26    #[token("in")]
27    In,
28    
29    #[token("end")]
30    End,
31    
32    #[token("continue")]
33    Continue,
34    
35    #[token("keysof")]
36    KeysOf,
37    
38    // Boolean and null literals
39    #[token("true")]
40    True,
41    
42    #[token("false")]
43    False,
44    
45    #[token("null")]
46    Null,
47    
48    // Identifiers
49    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned(), priority = 2)]
50    Identifier(String),
51
52    // Comments
53    #[regex(r"//[^\n]*", logos::skip)]
54    Comment,
55    
56    // Literals
57    #[regex(r#""([^"\\]|\\.)*""#, string_literal)]
58    StringLiteral(String),
59    
60    #[regex(r"-?[0-9]+(\.[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
61    NumberLiteral(f64),
62    
63    // Operators
64    #[token("!")]
65    Bang,
66    
67    #[token("=")]
68    Equal,
69    
70    #[token("==")]
71    EqualEqual,
72    
73    #[token("!=")]
74    NotEqual,
75    
76    #[token("+")]
77    Plus,
78    
79    #[token("-")]
80    Minus,
81    
82    #[token("*")]
83    Multiply,
84    
85    #[token("/", priority = 1)]
86    Divide,
87    
88    // Logical operators
89    #[token("&&")]
90    And,
91    
92    #[token("||")]
93    Or,
94    
95    // Comparators
96    #[token(">=")]
97    GreaterEqual,
98    
99    #[token("<=")]
100    LessEqual,
101    
102    #[token(">")]
103    Greater,
104    
105    #[token("<")]
106    Less,
107    
108    // Delimiters
109    #[token("{")]
110    OpenBrace,
111    
112    #[token("}")]
113    CloseBrace,
114    
115    #[token("(")]
116    OpenParen,
117    
118    #[token(")")]
119    CloseParen,
120    
121    #[token(",")]
122    Comma,
123    
124    #[token(";")]
125    Semicolon,
126
127    // Object and array tokens
128    #[token("[")]
129    OpenBracket,
130    
131    #[token("]")]
132    CloseBracket,
133    
134    #[token(":")]
135    Colon,
136
137    #[token(".")]
138    Dot,
139}
140
141#[derive(Debug, Clone, PartialEq, Default)]
142pub struct TokenError;
143
144fn string_literal(lex: &mut Lexer<Token>) -> Option<String> {
145    let slice = lex.slice();
146    
147    let content = &slice[1..slice.len() - 1];
148    
149    let mut processed = String::new();
150    let mut chars = content.chars();
151    
152    while let Some(c) = chars.next() {
153        if c == '\\' {
154            if let Some(next) = chars.next() {
155                match next {
156                    'n' => processed.push('\n'),
157                    't' => processed.push('\t'),
158                    'r' => processed.push('\r'),
159                    '\\' => processed.push('\\'),
160                    '"' => processed.push('"'),
161                    _ => {
162                        // Invalid escape sequence
163                        processed.push('\\');
164                        processed.push(next);
165                    }
166                }
167            }
168        } else {
169            processed.push(c);
170        }
171    }
172    
173    Some(processed)
174}
175
176pub struct TokenWithSpan {
177    pub token: Token,
178    pub span: Span,
179}
180
181impl TokenWithSpan {
182    pub fn get_location(&self, source_code: &str) -> SourceLocation {
183        SourceLocation::from_spans(source_code, self.span.start, self.span.end)
184    }
185}
186
187pub fn tokenize(source: &str) -> Vec<TokenWithSpan> {
188    let mut lexer = Token::lexer(source);
189    let mut tokens = Vec::new();
190    
191    while let Some(token) = lexer.next() {
192        if let Ok(token) = token {
193            tokens.push(TokenWithSpan {
194                token: token.clone(),
195                span: lexer.span(),
196            });
197        }
198    }
199    
200    tokens
201}