Skip to main content

oak_delphi/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DelphiLanguage, lexer::token_type::DelphiTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, DelphiLanguage>;
8
9/// Lexer implementation for Delphi programming language
10#[derive(Clone, Debug)]
11pub struct DelphiLexer<'config> {
12    _config: &'config DelphiLanguage,
13}
14
15impl<'config> DelphiLexer<'config> {
16    pub fn new(config: &'config DelphiLanguage) -> Self {
17        Self { _config: config }
18    }
19
20    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
21        while state.not_at_end() {
22            let safe_point = state.get_position();
23
24            if self.skip_whitespace(state) {
25                continue;
26            }
27
28            if self.skip_comment(state) {
29                continue;
30            }
31
32            if self.lex_string_literal(state) {
33                continue;
34            }
35
36            if self.lex_number_literal(state) {
37                continue;
38            }
39
40            if self.lex_identifier_or_keyword(state) {
41                continue;
42            }
43
44            if self.lex_operators(state) {
45                continue;
46            }
47
48            if self.lex_single_char_tokens(state) {
49                continue;
50            }
51
52            // 如果没有匹配任何规则,添加错误 token 并前进
53            let start_pos = state.get_position();
54            if let Some(ch) = state.peek() {
55                state.advance(ch.len_utf8());
56                state.add_token(DelphiTokenType::Error, start_pos, state.get_position())
57            }
58
59            state.advance_if_dead_lock(safe_point)
60        }
61
62        Ok(())
63    }
64
65    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
66        let start_pos = state.get_position();
67        let mut consumed = false;
68        while let Some(ch) = state.peek() {
69            if ch.is_whitespace() {
70                consumed = true;
71                state.advance(ch.len_utf8())
72            }
73            else {
74                break;
75            }
76        }
77        if consumed {
78            state.add_token(DelphiTokenType::Whitespace, start_pos, state.get_position());
79            true
80        }
81        else {
82            false
83        }
84    }
85
86    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87        let start = state.get_position();
88
89        // Line comment: // ... until newline
90        if state.consume_if_starts_with("//") {
91            while let Some(ch) = state.peek() {
92                if ch == '\n' || ch == '\r' {
93                    break;
94                }
95                state.advance(ch.len_utf8())
96            }
97            state.add_token(DelphiTokenType::LineComment, start, state.get_position());
98            return true;
99        }
100
101        // Block comment: { ... }
102        if state.consume_if_starts_with("{") {
103            let mut depth = 1usize;
104            while let Some(ch) = state.peek() {
105                if ch == '{' {
106                    depth += 1
107                }
108                else if ch == '}' {
109                    depth -= 1;
110                    if depth == 0 {
111                        state.advance(1);
112                        break;
113                    }
114                }
115                state.advance(ch.len_utf8())
116            }
117            state.add_token(DelphiTokenType::BlockComment, start, state.get_position());
118            return true;
119        }
120
121        // Block comment: (* ... *)
122        if state.consume_if_starts_with("(*") {
123            while let Some(ch) = state.peek() {
124                if state.consume_if_starts_with("*)") {
125                    break;
126                }
127                state.advance(ch.len_utf8())
128            }
129            state.add_token(DelphiTokenType::BlockComment, start, state.get_position());
130            return true;
131        }
132
133        false
134    }
135
136    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
137        let start = state.get_position();
138        if let Some('\'') = state.peek() {
139            state.advance(1);
140            while let Some(ch) = state.peek() {
141                if ch == '\'' {
142                    state.advance(1);
143                    if state.peek() == Some('\'') {
144                        // Double single quote is an escaped single quote
145                        state.advance(1);
146                        continue;
147                    }
148                    break;
149                }
150                state.advance(ch.len_utf8())
151            }
152            state.add_token(DelphiTokenType::String, start, state.get_position());
153            return true;
154        }
155        false
156    }
157
158    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
159        let start = state.get_position();
160        let first = match state.peek() {
161            Some(c) => c,
162            None => return false,
163        };
164
165        if !first.is_ascii_digit() && first != '$' {
166            return false;
167        }
168
169        let mut is_float = false;
170
171        // Hexadecimal number
172        if first == '$' {
173            state.advance(1);
174            while let Some(c) = state.peek() {
175                if c.is_ascii_hexdigit() { state.advance(1) } else { break }
176            }
177        }
178        else {
179            // Decimal number
180            state.advance(1);
181            while let Some(c) = state.peek() {
182                if c.is_ascii_digit() { state.advance(1) } else { break }
183            }
184
185            // Fractional part
186            if state.peek() == Some('.') {
187                let next = state.peek_next_n(1);
188                if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
189                    is_float = true;
190                    state.advance(1); // consume '.'
191                    while let Some(c) = state.peek() {
192                        if c.is_ascii_digit() { state.advance(1) } else { break }
193                    }
194                }
195            }
196
197            // Exponent part
198            if let Some(c) = state.peek() {
199                if c == 'e' || c == 'E' {
200                    let next = state.peek_next_n(1);
201                    if next == Some('+') || next == Some('-') || next.map(|d| d.is_ascii_digit()).unwrap_or(false) {
202                        is_float = true;
203                        state.advance(1);
204                        if let Some(sign) = state.peek() {
205                            if sign == '+' || sign == '-' {
206                                state.advance(1)
207                            }
208                        }
209                        while let Some(d) = state.peek() {
210                            if d.is_ascii_digit() { state.advance(1) } else { break }
211                        }
212                    }
213                }
214            }
215        }
216
217        state.add_token(if is_float { DelphiTokenType::Float } else { DelphiTokenType::Number }, start, state.get_position());
218        true
219    }
220
221    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
222        let start = state.get_position();
223        let ch = match state.peek() {
224            Some(c) => c,
225            None => return false,
226        };
227
228        if !(ch.is_ascii_alphabetic() || ch == '_') {
229            return false;
230        }
231
232        state.advance(ch.len_utf8());
233        while let Some(c) = state.peek() {
234            if c.is_ascii_alphanumeric() || c == '_' { state.advance(c.len_utf8()) } else { break }
235        }
236
237        let end = state.get_position();
238        let text = state.get_text_in((start..end).into());
239        let kind = match text.to_lowercase().as_str() {
240            "and" => DelphiTokenType::And_,
241            "array" => DelphiTokenType::Array,
242            "as" => DelphiTokenType::As_,
243            "begin" => DelphiTokenType::Begin,
244            "case" => DelphiTokenType::Case,
245            "class" => DelphiTokenType::Class,
246            "const" => DelphiTokenType::Const,
247            "div" => DelphiTokenType::Div,
248            "do" => DelphiTokenType::Do,
249            "downto" => DelphiTokenType::Downto,
250            "else" => DelphiTokenType::Else,
251            "end" => DelphiTokenType::End,
252            "except" => DelphiTokenType::Except,
253            "false" => DelphiTokenType::False_,
254            "finally" => DelphiTokenType::Finally,
255            "for" => DelphiTokenType::For,
256            "function" => DelphiTokenType::Function,
257            "if" => DelphiTokenType::If,
258            "implementation" => DelphiTokenType::Implementation,
259            "in" => DelphiTokenType::In_,
260            "interface" => DelphiTokenType::Interface,
261            "is" => DelphiTokenType::Is_,
262            "mod" => DelphiTokenType::Mod,
263            "nil" => DelphiTokenType::Nil,
264            "not" => DelphiTokenType::Not_,
265            "object" => DelphiTokenType::Object,
266            "of" => DelphiTokenType::Of,
267            "or" => DelphiTokenType::Or_,
268            "procedure" => DelphiTokenType::Procedure,
269            "program" => DelphiTokenType::Program,
270            "record" => DelphiTokenType::Record,
271            "repeat" => DelphiTokenType::Repeat,
272            "set" => DelphiTokenType::Set,
273            "then" => DelphiTokenType::Then,
274            "to" => DelphiTokenType::To,
275            "true" => DelphiTokenType::True_,
276            "try" => DelphiTokenType::Try,
277            "type" => DelphiTokenType::Type,
278            "unit" => DelphiTokenType::Unit,
279            "until" => DelphiTokenType::Until,
280            "uses" => DelphiTokenType::Uses,
281            "var" => DelphiTokenType::Var,
282            "while" => DelphiTokenType::While,
283            "with" => DelphiTokenType::With,
284            _ => DelphiTokenType::Identifier,
285        };
286
287        state.add_token(kind, start, state.get_position());
288        true
289    }
290
291    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
292        let start = state.get_position();
293
294        // Multi-character operators (longest first)
295        let patterns: &[(&str, DelphiTokenType)] = &[(":=", DelphiTokenType::Assign), ("<=", DelphiTokenType::LessEqual), (">=", DelphiTokenType::GreaterEqual), ("<>", DelphiTokenType::NotEqual), ("..", DelphiTokenType::DotDot)];
296
297        for (pat, kind) in patterns {
298            if state.consume_if_starts_with(pat) {
299                state.add_token(*kind, start, state.get_position());
300                return true;
301            }
302        }
303
304        // Single-character operators
305        if let Some(ch) = state.peek() {
306            let kind = match ch {
307                '+' => Some(DelphiTokenType::Plus),
308                '-' => Some(DelphiTokenType::Minus),
309                '*' => Some(DelphiTokenType::Star),
310                '/' => Some(DelphiTokenType::Slash),
311                '=' => Some(DelphiTokenType::Equal),
312                '<' => Some(DelphiTokenType::Less),
313                '>' => Some(DelphiTokenType::Greater),
314                '.' => Some(DelphiTokenType::Dot),
315                ':' => Some(DelphiTokenType::Colon),
316                '^' => Some(DelphiTokenType::Caret),
317                '@' => Some(DelphiTokenType::At),
318                _ => None,
319            };
320
321            if let Some(k) = kind {
322                state.advance(ch.len_utf8());
323                state.add_token(k, start, state.get_position());
324                return true;
325            }
326        }
327
328        false
329    }
330
331    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
332        let start = state.get_position();
333
334        if let Some(ch) = state.peek() {
335            let kind = match ch {
336                '(' => DelphiTokenType::LeftParen,
337                ')' => DelphiTokenType::RightParen,
338                '[' => DelphiTokenType::LeftBracket,
339                ']' => DelphiTokenType::RightBracket,
340                ',' => DelphiTokenType::Comma,
341                ';' => DelphiTokenType::Semicolon,
342                _ => return false,
343            };
344
345            state.advance(ch.len_utf8());
346            state.add_token(kind, start, state.get_position());
347            true
348        }
349        else {
350            false
351        }
352    }
353}
354
355impl<'config> Lexer<DelphiLanguage> for DelphiLexer<'config> {
356    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DelphiLanguage>) -> LexOutput<DelphiLanguage> {
357        let mut state = State::new_with_cache(source, 0, cache);
358        let result = self.run(&mut state);
359        if result.is_ok() {
360            state.add_eof()
361        }
362        state.finish_with_cache(result, cache)
363    }
364}