devalang_core/core/lexer/handler/
driver.rs

1use crate::core::lexer::{
2    handler::{
3        arrow::handle_arrow_lexer,
4        at::handle_at_lexer,
5        brace::{handle_lbrace_lexer, handle_rbrace_lexer},
6        colon::handle_colon_lexer,
7        comment::handle_comment_lexer,
8        dot::handle_dot_lexer,
9        identifier::handle_identifier_lexer,
10        indent::handle_indent_lexer,
11        newline::handle_newline_lexer,
12        number::handle_number_lexer,
13        operator::handle_operator_lexer,
14        parenthesis::{handle_lparen_lexer, handle_rparen_lexer},
15        slash::handle_slash_lexer,
16        string::handle_string_lexer,
17    },
18    token::{Token, TokenKind},
19};
20
21fn advance_char<I: Iterator<Item = char>>(
22    chars: &mut std::iter::Peekable<I>,
23    _line: &mut usize,
24    column: &mut usize,
25) -> Option<char> {
26    for c in chars.by_ref() {
27        if c == '\r' {
28            continue;
29        } else if c == '\n' {
30            // newline: don't increment column
31        } else {
32            *column += 1;
33        }
34        return Some(c);
35    }
36    None
37}
38
39pub fn handle_content_lexing(content: String) -> Result<Vec<Token>, String> {
40    let mut tokens = Vec::new();
41
42    let mut line = 1;
43    let mut column = 1;
44
45    let mut indent_stack: Vec<usize> = vec![0];
46    let mut current_indent = 0;
47    let mut at_line_start = true;
48
49    let mut chars = content.chars().peekable();
50
51    while chars.peek().is_some() {
52        if at_line_start {
53            handle_indent_lexer(
54                &mut chars,
55                &mut current_indent,
56                &mut indent_stack,
57                &mut tokens,
58                &mut line,
59                &mut column,
60            );
61
62            at_line_start = false;
63        }
64
65        let Some(ch) = advance_char(&mut chars, &mut line, &mut column) else {
66            break;
67        };
68
69        match ch {
70            '\n' => {
71                handle_newline_lexer(
72                    ch,
73                    &mut chars,
74                    &mut tokens,
75                    &mut line,
76                    &mut column,
77                    &mut at_line_start,
78                    &mut current_indent,
79                );
80            }
81            ' ' | '\t' => {
82                // Already handled by indent_lexer
83            }
84            '#' => {
85                handle_comment_lexer(
86                    ch,
87                    &mut chars,
88                    &mut current_indent,
89                    &mut indent_stack,
90                    &mut tokens,
91                    &mut line,
92                    &mut column,
93                );
94            }
95            ':' => {
96                handle_colon_lexer(
97                    ch,
98                    &mut chars,
99                    &mut current_indent,
100                    &mut indent_stack,
101                    &mut tokens,
102                    &mut line,
103                    &mut column,
104                );
105            }
106            '=' | '!' | '<' | '>' | '+' | '*' => {
107                handle_operator_lexer(
108                    ch,
109                    &mut chars,
110                    &mut current_indent,
111                    &mut indent_stack,
112                    &mut tokens,
113                    &mut line,
114                    &mut column,
115                );
116            }
117            '/' => {
118                handle_slash_lexer(
119                    ch,
120                    &mut chars,
121                    &mut current_indent,
122                    &mut indent_stack,
123                    &mut tokens,
124                    &mut line,
125                    &mut column,
126                );
127            }
128            '-' => {
129                handle_arrow_lexer(
130                    ch,
131                    &mut chars,
132                    &mut current_indent,
133                    &mut indent_stack,
134                    &mut tokens,
135                    &mut line,
136                    &mut column,
137                );
138                // If not parsed as arrow or number, fallback as Minus token
139                if let Some(last) = tokens.last() {
140                    if last.kind == TokenKind::Unknown && last.lexeme == "-" {
141                        // replace last with Minus
142                        let _ = tokens.pop();
143                        tokens.push(Token {
144                            kind: TokenKind::Minus,
145                            lexeme: "-".to_string(),
146                            line,
147                            column,
148                            indent: current_indent,
149                        });
150                    }
151                }
152            }
153            '{' => {
154                handle_lbrace_lexer(
155                    ch,
156                    &mut chars,
157                    &mut current_indent,
158                    &mut indent_stack,
159                    &mut tokens,
160                    &mut line,
161                    &mut column,
162                );
163            }
164            '}' => {
165                handle_rbrace_lexer(
166                    ch,
167                    &mut chars,
168                    &mut current_indent,
169                    &mut indent_stack,
170                    &mut tokens,
171                    &mut line,
172                    &mut column,
173                );
174            }
175            '[' => {
176                tokens.push(Token {
177                    kind: TokenKind::LBracket,
178                    lexeme: "[".to_string(),
179                    line,
180                    column,
181                    indent: current_indent,
182                });
183            }
184            ']' => {
185                tokens.push(Token {
186                    kind: TokenKind::RBracket,
187                    lexeme: "]".to_string(),
188                    line,
189                    column,
190                    indent: current_indent,
191                });
192            }
193            ',' => {
194                tokens.push(Token {
195                    kind: TokenKind::Comma,
196                    lexeme: ",".to_string(),
197                    line,
198                    column,
199                    indent: current_indent,
200                });
201            }
202            '(' => {
203                handle_lparen_lexer(
204                    ch,
205                    &mut chars,
206                    &mut current_indent,
207                    &mut indent_stack,
208                    &mut tokens,
209                    &mut line,
210                    &mut column,
211                );
212            }
213            ')' => {
214                handle_rparen_lexer(
215                    ch,
216                    &mut chars,
217                    &mut current_indent,
218                    &mut indent_stack,
219                    &mut tokens,
220                    &mut line,
221                    &mut column,
222                );
223            }
224            '.' => {
225                handle_dot_lexer(
226                    ch,
227                    &mut chars,
228                    &mut current_indent,
229                    &mut indent_stack,
230                    &mut tokens,
231                    &mut line,
232                    &mut column,
233                );
234            }
235            '@' => {
236                handle_at_lexer(
237                    ch,
238                    &mut chars,
239                    &mut current_indent,
240                    &mut indent_stack,
241                    &mut tokens,
242                    &mut line,
243                    &mut column,
244                );
245            }
246            '$' => {
247                // Treat `$` as start of a special identifier like `$env` or `$math`
248                let mut ident = String::from("$");
249                while let Some(&c) = chars.peek() {
250                    if c.is_ascii_alphanumeric() || c == '_' {
251                        ident.push(c);
252                        chars.next();
253                        column += 1;
254                    } else {
255                        break;
256                    }
257                }
258                tokens.push(Token {
259                    kind: TokenKind::Identifier,
260                    lexeme: ident,
261                    line,
262                    column,
263                    indent: current_indent,
264                });
265            }
266            '0'..='9' => {
267                handle_number_lexer(
268                    ch,
269                    &mut chars,
270                    &mut current_indent,
271                    &mut indent_stack,
272                    &mut tokens,
273                    &mut line,
274                    &mut column,
275                );
276            }
277            'a'..='z' | 'A'..='Z' | '_' => {
278                handle_identifier_lexer(
279                    ch,
280                    &mut chars,
281                    &mut current_indent,
282                    &mut indent_stack,
283                    &mut tokens,
284                    &mut line,
285                    &mut column,
286                );
287            }
288            '"' | '\'' => {
289                handle_string_lexer(
290                    ch,
291                    &mut chars,
292                    &mut current_indent,
293                    &mut indent_stack,
294                    &mut tokens,
295                    &mut line,
296                    &mut column,
297                );
298            }
299            _ => {
300                // Ignore unknown char
301            }
302        }
303    }
304
305    while indent_stack.len() > 1 {
306        indent_stack.pop();
307        current_indent = *indent_stack.last().unwrap();
308        tokens.push(Token {
309            kind: TokenKind::Dedent,
310            lexeme: String::new(),
311            line,
312            column,
313            indent: current_indent,
314        });
315    }
316
317    tokens.push(Token {
318        kind: TokenKind::EOF,
319        lexeme: String::new(),
320        line: line + 1,
321        column: 0,
322        indent: 0,
323    });
324
325    // NOTE: Debug only
326    // for token in &tokens {
327    //     println!(
328    //         "{:?} @ line {}, col {}, indent {}",
329    //         token.kind,
330    //         token.line,
331    //         token.column,
332    //         token.indent
333    //     );
334    // }
335
336    Ok(tokens)
337}