1use crate::{kind::DelphiSyntaxKind, language::DelphiLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, DelphiLanguage>;
10
11static DELPHI_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static DELPHI_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["//"] });
13static DELPHI_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: None });
14
15#[derive(Clone)]
17pub struct DelphiLexer<'config> {
18 config: &'config DelphiLanguage,
19}
20
21impl<'config> Lexer<DelphiLanguage> for DelphiLexer<'config> {
22 fn lex_incremental(
23 &self,
24 source: impl Source,
25 changed: usize,
26 cache: IncrementalCache<DelphiLanguage>,
27 ) -> LexOutput<DelphiLanguage> {
28 let mut state = LexerState::new_with_cache(source, changed, cache);
29 let result = self.run(&mut state);
30 state.finish(result)
31 }
32}
33
34impl<'config> DelphiLexer<'config> {
35 pub fn new(config: &'config DelphiLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 let safe_point = state.get_position();
42
43 if self.skip_whitespace(state) {
44 continue;
45 }
46
47 if self.skip_comment(state) {
48 continue;
49 }
50
51 if self.lex_string_literal(state) {
52 continue;
53 }
54
55 if self.lex_number_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_operators(state) {
64 continue;
65 }
66
67 if self.lex_single_char_tokens(state) {
68 continue;
69 }
70
71 state.safe_check(safe_point);
72 }
73
74 let eof_pos = state.get_position();
76 state.add_token(DelphiSyntaxKind::Eof, eof_pos, eof_pos);
77 Ok(())
78 }
79
80 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
81 match DELPHI_WHITESPACE.scan(state.rest(), state.get_position(), DelphiSyntaxKind::Whitespace) {
82 Some(token) => {
83 state.advance_with(token);
84 true
85 }
86 None => false,
87 }
88 }
89
90 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
91 let start = state.get_position();
92 let rest = state.rest();
93
94 if rest.starts_with("//") {
96 state.advance(2);
97 while let Some(ch) = state.peek() {
98 if ch == '\n' || ch == '\r' {
99 break;
100 }
101 state.advance(ch.len_utf8());
102 }
103 state.add_token(DelphiSyntaxKind::Comment, start, state.get_position());
104 return true;
105 }
106
107 if rest.starts_with("{") {
109 state.advance(1);
110 while let Some(ch) = state.peek() {
111 if ch == '}' {
112 state.advance(1);
113 break;
114 }
115 state.advance(ch.len_utf8());
116 }
117 state.add_token(DelphiSyntaxKind::Comment, start, state.get_position());
118 return true;
119 }
120
121 if rest.starts_with("(*") {
122 state.advance(2);
123 while let Some(ch) = state.peek() {
124 if ch == '*' && state.peek_next_n(1) == Some(')') {
125 state.advance(2);
126 break;
127 }
128 state.advance(ch.len_utf8());
129 }
130 state.add_token(DelphiSyntaxKind::Comment, start, state.get_position());
131 return true;
132 }
133
134 false
135 }
136
137 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
138 let start = state.get_position();
139
140 if state.current() != Some('\'') {
141 return false;
142 }
143
144 state.advance(1); while let Some(ch) = state.peek() {
146 if ch == '\'' {
147 if state.peek_next_n(1) == Some('\'') {
149 state.advance(2); continue;
151 }
152 else {
153 state.advance(1); break;
155 }
156 }
157 if ch == '\n' || ch == '\r' {
158 break; }
160 state.advance(ch.len_utf8());
161 }
162
163 state.add_token(DelphiSyntaxKind::String, start, state.get_position());
164 true
165 }
166
167 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
168 let start = state.get_position();
169 let first = match state.current() {
170 Some(c) => c,
171 None => return false,
172 };
173
174 if !first.is_ascii_digit() && first != '$' {
175 return false;
176 }
177
178 let mut is_float = false;
179
180 if first == '$' {
182 state.advance(1);
183 while let Some(c) = state.peek() {
184 if c.is_ascii_hexdigit() {
185 state.advance(1);
186 }
187 else {
188 break;
189 }
190 }
191 }
192 else {
193 state.advance(1);
195 while let Some(c) = state.peek() {
196 if c.is_ascii_digit() {
197 state.advance(1);
198 }
199 else {
200 break;
201 }
202 }
203
204 if state.peek() == Some('.') {
206 let next = state.peek_next_n(1);
207 if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
208 is_float = true;
209 state.advance(1); while let Some(c) = state.peek() {
211 if c.is_ascii_digit() {
212 state.advance(1);
213 }
214 else {
215 break;
216 }
217 }
218 }
219 }
220
221 if let Some(c) = state.peek() {
223 if c == 'e' || c == 'E' {
224 let next = state.peek_next_n(1);
225 if next == Some('+') || next == Some('-') || next.map(|d| d.is_ascii_digit()).unwrap_or(false) {
226 is_float = true;
227 state.advance(1);
228 if let Some(sign) = state.peek() {
229 if sign == '+' || sign == '-' {
230 state.advance(1);
231 }
232 }
233 while let Some(d) = state.peek() {
234 if d.is_ascii_digit() {
235 state.advance(1);
236 }
237 else {
238 break;
239 }
240 }
241 }
242 }
243 }
244 }
245
246 state.add_token(DelphiSyntaxKind::Number, start, state.get_position());
247 true
248 }
249
250 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
251 let start = state.get_position();
252 let ch = match state.current() {
253 Some(c) => c,
254 None => return false,
255 };
256
257 if !(ch.is_ascii_alphabetic() || ch == '_') {
258 return false;
259 }
260
261 state.advance(1);
262 while let Some(c) = state.current() {
263 if c.is_ascii_alphanumeric() || c == '_' {
264 state.advance(1);
265 }
266 else {
267 break;
268 }
269 }
270
271 let end = state.get_position();
272 let text = state.get_text_in((start..end).into());
273 let kind = match text.to_lowercase().as_str() {
274 "and" => DelphiSyntaxKind::And_,
275 "array" => DelphiSyntaxKind::Array,
276 "as" => DelphiSyntaxKind::As_,
277 "begin" => DelphiSyntaxKind::Begin,
278 "case" => DelphiSyntaxKind::Case,
279 "class" => DelphiSyntaxKind::Class,
280 "const" => DelphiSyntaxKind::Const,
281 "div" => DelphiSyntaxKind::Div,
282 "do" => DelphiSyntaxKind::Do,
283 "downto" => DelphiSyntaxKind::Downto,
284 "else" => DelphiSyntaxKind::Else,
285 "end" => DelphiSyntaxKind::End,
286 "except" => DelphiSyntaxKind::Except,
287 "false" => DelphiSyntaxKind::False_,
288 "finally" => DelphiSyntaxKind::Finally,
289 "for" => DelphiSyntaxKind::For,
290 "function" => DelphiSyntaxKind::Function,
291 "if" => DelphiSyntaxKind::If,
292 "implementation" => DelphiSyntaxKind::Implementation,
293 "in" => DelphiSyntaxKind::In_,
294 "interface" => DelphiSyntaxKind::Interface,
295 "is" => DelphiSyntaxKind::Is_,
296 "mod" => DelphiSyntaxKind::Mod,
297 "nil" => DelphiSyntaxKind::Nil,
298 "not" => DelphiSyntaxKind::Not_,
299 "object" => DelphiSyntaxKind::Object,
300 "of" => DelphiSyntaxKind::Of,
301 "or" => DelphiSyntaxKind::Or_,
302 "procedure" => DelphiSyntaxKind::Procedure,
303 "program" => DelphiSyntaxKind::Program,
304 "record" => DelphiSyntaxKind::Record,
305 "repeat" => DelphiSyntaxKind::Repeat,
306 "set" => DelphiSyntaxKind::Set,
307 "then" => DelphiSyntaxKind::Then,
308 "to" => DelphiSyntaxKind::To,
309 "true" => DelphiSyntaxKind::True_,
310 "try" => DelphiSyntaxKind::Try,
311 "type" => DelphiSyntaxKind::Type,
312 "unit" => DelphiSyntaxKind::Unit,
313 "until" => DelphiSyntaxKind::Until,
314 "uses" => DelphiSyntaxKind::Uses,
315 "var" => DelphiSyntaxKind::Var,
316 "while" => DelphiSyntaxKind::While,
317 "with" => DelphiSyntaxKind::With,
318 _ => DelphiSyntaxKind::Identifier,
319 };
320
321 state.add_token(kind, start, state.get_position());
322 true
323 }
324
325 fn lex_operators<S: Source>(&self, state: &mut State<S>) -> bool {
326 let start = state.get_position();
327 let rest = state.rest();
328
329 let patterns: &[(&str, DelphiSyntaxKind)] = &[
331 (":=", DelphiSyntaxKind::Assign),
332 ("<=", DelphiSyntaxKind::LessEqual),
333 (">=", DelphiSyntaxKind::GreaterEqual),
334 ("<>", DelphiSyntaxKind::NotEqual),
335 ("..", DelphiSyntaxKind::DotDot),
336 ];
337
338 for (pat, kind) in patterns {
339 if rest.starts_with(pat) {
340 state.advance(pat.len());
341 state.add_token(*kind, start, state.get_position());
342 return true;
343 }
344 }
345
346 if let Some(ch) = state.current() {
348 let kind = match ch {
349 '+' => Some(DelphiSyntaxKind::Plus),
350 '-' => Some(DelphiSyntaxKind::Minus),
351 '*' => Some(DelphiSyntaxKind::Star),
352 '/' => Some(DelphiSyntaxKind::Slash),
353 '=' => Some(DelphiSyntaxKind::Equal),
354 '<' => Some(DelphiSyntaxKind::Less),
355 '>' => Some(DelphiSyntaxKind::Greater),
356 '.' => Some(DelphiSyntaxKind::Dot),
357 ':' => Some(DelphiSyntaxKind::Colon),
358 '^' => Some(DelphiSyntaxKind::Caret),
359 '@' => Some(DelphiSyntaxKind::At),
360 _ => None,
361 };
362
363 if let Some(k) = kind {
364 state.advance(ch.len_utf8());
365 state.add_token(k, start, state.get_position());
366 return true;
367 }
368 }
369
370 false
371 }
372
373 fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
374 let start = state.get_position();
375
376 if let Some(ch) = state.current() {
377 let kind = match ch {
378 '(' => DelphiSyntaxKind::LeftParen,
379 ')' => DelphiSyntaxKind::RightParen,
380 '[' => DelphiSyntaxKind::LeftBracket,
381 ']' => DelphiSyntaxKind::RightBracket,
382 ',' => DelphiSyntaxKind::Comma,
383 ';' => DelphiSyntaxKind::Semicolon,
384 _ => return false,
385 };
386
387 state.advance(ch.len_utf8());
388 state.add_token(kind, start, state.get_position());
389 true
390 }
391 else {
392 false
393 }
394 }
395}