oak_delphi/lexer/
mod.rs

1use crate::{kind::DelphiSyntaxKind, language::DelphiLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, DelphiLanguage>;
5
6/// Lexer implementation for Delphi programming language
7#[derive(Clone)]
8pub struct DelphiLexer<'config> {
9    _config: &'config DelphiLanguage,
10}
11
12impl<'config> DelphiLexer<'config> {
13    pub fn new(config: &'config DelphiLanguage) -> Self {
14        Self { _config: config }
15    }
16
17    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
18        while state.not_at_end() {
19            let safe_point = state.get_position();
20
21            if self.skip_whitespace(state) {
22                continue;
23            }
24
25            if self.skip_comment(state) {
26                continue;
27            }
28
29            if self.lex_string_literal(state) {
30                continue;
31            }
32
33            if self.lex_number_literal(state) {
34                continue;
35            }
36
37            if self.lex_identifier_or_keyword(state) {
38                continue;
39            }
40
41            if self.lex_operators(state) {
42                continue;
43            }
44
45            if self.lex_single_char_tokens(state) {
46                continue;
47            }
48
49            // 如果没有匹配任何规则,添加错误 token 并前进
50            let start_pos = state.get_position();
51            if let Some(ch) = state.peek() {
52                state.advance(ch.len_utf8());
53                state.add_token(DelphiSyntaxKind::Error, start_pos, state.get_position());
54            }
55
56            state.advance_if_dead_lock(safe_point);
57        }
58
59        Ok(())
60    }
61
62    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
63        let start_pos = state.get_position();
64        let mut consumed = false;
65        while let Some(ch) = state.peek() {
66            if ch.is_whitespace() {
67                consumed = true;
68                state.advance(ch.len_utf8());
69            }
70            else {
71                break;
72            }
73        }
74        if consumed {
75            state.add_token(DelphiSyntaxKind::Whitespace, start_pos, state.get_position());
76            true
77        }
78        else {
79            false
80        }
81    }
82
83    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
84        let start = state.get_position();
85
86        // Line comment: // ... until newline
87        if state.consume_if_starts_with("//") {
88            while let Some(ch) = state.peek() {
89                if ch == '\n' || ch == '\r' {
90                    break;
91                }
92                state.advance(ch.len_utf8());
93            }
94            state.add_token(DelphiSyntaxKind::LineComment, start, state.get_position());
95            return true;
96        }
97
98        // Block comment: { ... }
99        if state.consume_if_starts_with("{") {
100            let mut depth = 1usize;
101            while let Some(ch) = state.peek() {
102                if ch == '{' {
103                    depth += 1;
104                }
105                else if ch == '}' {
106                    depth -= 1;
107                    if depth == 0 {
108                        state.advance(1);
109                        break;
110                    }
111                }
112                state.advance(ch.len_utf8());
113            }
114            state.add_token(DelphiSyntaxKind::BlockComment, start, state.get_position());
115            return true;
116        }
117
118        // Block comment: (* ... *)
119        if state.consume_if_starts_with("(*") {
120            while let Some(ch) = state.peek() {
121                if state.consume_if_starts_with("*)") {
122                    break;
123                }
124                state.advance(ch.len_utf8());
125            }
126            state.add_token(DelphiSyntaxKind::BlockComment, start, state.get_position());
127            return true;
128        }
129
130        false
131    }
132
133    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
134        let start = state.get_position();
135        if let Some('\'') = state.peek() {
136            state.advance(1);
137            while let Some(ch) = state.peek() {
138                if ch == '\'' {
139                    state.advance(1);
140                    if state.peek() == Some('\'') {
141                        // Double single quote is an escaped single quote
142                        state.advance(1);
143                        continue;
144                    }
145                    break;
146                }
147                state.advance(ch.len_utf8());
148            }
149            state.add_token(DelphiSyntaxKind::String, start, state.get_position());
150            return true;
151        }
152        false
153    }
154
155    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156        let start = state.get_position();
157        let first = match state.peek() {
158            Some(c) => c,
159            None => return false,
160        };
161
162        if !first.is_ascii_digit() && first != '$' {
163            return false;
164        }
165
166        let mut is_float = false;
167
168        // Hexadecimal number
169        if first == '$' {
170            state.advance(1);
171            while let Some(c) = state.peek() {
172                if c.is_ascii_hexdigit() {
173                    state.advance(1);
174                }
175                else {
176                    break;
177                }
178            }
179        }
180        else {
181            // Decimal number
182            state.advance(1);
183            while let Some(c) = state.peek() {
184                if c.is_ascii_digit() {
185                    state.advance(1);
186                }
187                else {
188                    break;
189                }
190            }
191
192            // Fractional part
193            if state.peek() == Some('.') {
194                let next = state.peek_next_n(1);
195                if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
196                    is_float = true;
197                    state.advance(1); // consume '.'
198                    while let Some(c) = state.peek() {
199                        if c.is_ascii_digit() {
200                            state.advance(1);
201                        }
202                        else {
203                            break;
204                        }
205                    }
206                }
207            }
208
209            // Exponent part
210            if let Some(c) = state.peek() {
211                if c == 'e' || c == 'E' {
212                    let next = state.peek_next_n(1);
213                    if next == Some('+') || next == Some('-') || next.map(|d| d.is_ascii_digit()).unwrap_or(false) {
214                        is_float = true;
215                        state.advance(1);
216                        if let Some(sign) = state.peek() {
217                            if sign == '+' || sign == '-' {
218                                state.advance(1);
219                            }
220                        }
221                        while let Some(d) = state.peek() {
222                            if d.is_ascii_digit() {
223                                state.advance(1);
224                            }
225                            else {
226                                break;
227                            }
228                        }
229                    }
230                }
231            }
232        }
233
234        state.add_token(if is_float { DelphiSyntaxKind::Float } else { DelphiSyntaxKind::Number }, start, state.get_position());
235        true
236    }
237
238    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
239        let start = state.get_position();
240        let ch = match state.peek() {
241            Some(c) => c,
242            None => return false,
243        };
244
245        if !(ch.is_ascii_alphabetic() || ch == '_') {
246            return false;
247        }
248
249        state.advance(ch.len_utf8());
250        while let Some(c) = state.peek() {
251            if c.is_ascii_alphanumeric() || c == '_' {
252                state.advance(c.len_utf8());
253            }
254            else {
255                break;
256            }
257        }
258
259        let end = state.get_position();
260        let text = state.get_text_in((start..end).into());
261        let kind = match text.to_lowercase().as_str() {
262            "and" => DelphiSyntaxKind::And_,
263            "array" => DelphiSyntaxKind::Array,
264            "as" => DelphiSyntaxKind::As_,
265            "begin" => DelphiSyntaxKind::Begin,
266            "case" => DelphiSyntaxKind::Case,
267            "class" => DelphiSyntaxKind::Class,
268            "const" => DelphiSyntaxKind::Const,
269            "div" => DelphiSyntaxKind::Div,
270            "do" => DelphiSyntaxKind::Do,
271            "downto" => DelphiSyntaxKind::Downto,
272            "else" => DelphiSyntaxKind::Else,
273            "end" => DelphiSyntaxKind::End,
274            "except" => DelphiSyntaxKind::Except,
275            "false" => DelphiSyntaxKind::False_,
276            "finally" => DelphiSyntaxKind::Finally,
277            "for" => DelphiSyntaxKind::For,
278            "function" => DelphiSyntaxKind::Function,
279            "if" => DelphiSyntaxKind::If,
280            "implementation" => DelphiSyntaxKind::Implementation,
281            "in" => DelphiSyntaxKind::In_,
282            "interface" => DelphiSyntaxKind::Interface,
283            "is" => DelphiSyntaxKind::Is_,
284            "mod" => DelphiSyntaxKind::Mod,
285            "nil" => DelphiSyntaxKind::Nil,
286            "not" => DelphiSyntaxKind::Not_,
287            "object" => DelphiSyntaxKind::Object,
288            "of" => DelphiSyntaxKind::Of,
289            "or" => DelphiSyntaxKind::Or_,
290            "procedure" => DelphiSyntaxKind::Procedure,
291            "program" => DelphiSyntaxKind::Program,
292            "record" => DelphiSyntaxKind::Record,
293            "repeat" => DelphiSyntaxKind::Repeat,
294            "set" => DelphiSyntaxKind::Set,
295            "then" => DelphiSyntaxKind::Then,
296            "to" => DelphiSyntaxKind::To,
297            "true" => DelphiSyntaxKind::True_,
298            "try" => DelphiSyntaxKind::Try,
299            "type" => DelphiSyntaxKind::Type,
300            "unit" => DelphiSyntaxKind::Unit,
301            "until" => DelphiSyntaxKind::Until,
302            "uses" => DelphiSyntaxKind::Uses,
303            "var" => DelphiSyntaxKind::Var,
304            "while" => DelphiSyntaxKind::While,
305            "with" => DelphiSyntaxKind::With,
306            _ => DelphiSyntaxKind::Identifier,
307        };
308
309        state.add_token(kind, start, state.get_position());
310        true
311    }
312
313    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
314        let start = state.get_position();
315
316        // Multi-character operators (longest first)
317        let patterns: &[(&str, DelphiSyntaxKind)] = &[(":=", DelphiSyntaxKind::Assign), ("<=", DelphiSyntaxKind::LessEqual), (">=", DelphiSyntaxKind::GreaterEqual), ("<>", DelphiSyntaxKind::NotEqual), ("..", DelphiSyntaxKind::DotDot)];
318
319        for (pat, kind) in patterns {
320            if state.consume_if_starts_with(pat) {
321                state.add_token(*kind, start, state.get_position());
322                return true;
323            }
324        }
325
326        // Single-character operators
327        if let Some(ch) = state.peek() {
328            let kind = match ch {
329                '+' => Some(DelphiSyntaxKind::Plus),
330                '-' => Some(DelphiSyntaxKind::Minus),
331                '*' => Some(DelphiSyntaxKind::Star),
332                '/' => Some(DelphiSyntaxKind::Slash),
333                '=' => Some(DelphiSyntaxKind::Equal),
334                '<' => Some(DelphiSyntaxKind::Less),
335                '>' => Some(DelphiSyntaxKind::Greater),
336                '.' => Some(DelphiSyntaxKind::Dot),
337                ':' => Some(DelphiSyntaxKind::Colon),
338                '^' => Some(DelphiSyntaxKind::Caret),
339                '@' => Some(DelphiSyntaxKind::At),
340                _ => None,
341            };
342
343            if let Some(k) = kind {
344                state.advance(ch.len_utf8());
345                state.add_token(k, start, state.get_position());
346                return true;
347            }
348        }
349
350        false
351    }
352
353    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
354        let start = state.get_position();
355
356        if let Some(ch) = state.peek() {
357            let kind = match ch {
358                '(' => DelphiSyntaxKind::LeftParen,
359                ')' => DelphiSyntaxKind::RightParen,
360                '[' => DelphiSyntaxKind::LeftBracket,
361                ']' => DelphiSyntaxKind::RightBracket,
362                ',' => DelphiSyntaxKind::Comma,
363                ';' => DelphiSyntaxKind::Semicolon,
364                _ => return false,
365            };
366
367            state.advance(ch.len_utf8());
368            state.add_token(kind, start, state.get_position());
369            true
370        }
371        else {
372            false
373        }
374    }
375}
376
377impl<'config> Lexer<DelphiLanguage> for DelphiLexer<'config> {
378    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DelphiLanguage>) -> LexOutput<DelphiLanguage> {
379        let mut state = LexerState::new(source);
380        let result = self.run(&mut state);
381        if result.is_ok() {
382            state.add_eof();
383        }
384        state.finish_with_cache(result, cache)
385    }
386}