1use crate::{kind::DelphiSyntaxKind, language::DelphiLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, DelphiLanguage>;
5
6#[derive(Clone)]
8pub struct DelphiLexer<'config> {
9 _config: &'config DelphiLanguage,
10}
11
12impl<'config> DelphiLexer<'config> {
13 pub fn new(config: &'config DelphiLanguage) -> Self {
14 Self { _config: config }
15 }
16
17 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
18 while state.not_at_end() {
19 let safe_point = state.get_position();
20
21 if self.skip_whitespace(state) {
22 continue;
23 }
24
25 if self.skip_comment(state) {
26 continue;
27 }
28
29 if self.lex_string_literal(state) {
30 continue;
31 }
32
33 if self.lex_number_literal(state) {
34 continue;
35 }
36
37 if self.lex_identifier_or_keyword(state) {
38 continue;
39 }
40
41 if self.lex_operators(state) {
42 continue;
43 }
44
45 if self.lex_single_char_tokens(state) {
46 continue;
47 }
48
49 let start_pos = state.get_position();
51 if let Some(ch) = state.peek() {
52 state.advance(ch.len_utf8());
53 state.add_token(DelphiSyntaxKind::Error, start_pos, state.get_position());
54 }
55
56 state.advance_if_dead_lock(safe_point);
57 }
58
59 Ok(())
60 }
61
62 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
63 let start_pos = state.get_position();
64 let mut consumed = false;
65 while let Some(ch) = state.peek() {
66 if ch.is_whitespace() {
67 consumed = true;
68 state.advance(ch.len_utf8());
69 }
70 else {
71 break;
72 }
73 }
74 if consumed {
75 state.add_token(DelphiSyntaxKind::Whitespace, start_pos, state.get_position());
76 true
77 }
78 else {
79 false
80 }
81 }
82
83 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
84 let start = state.get_position();
85
86 if state.consume_if_starts_with("//") {
88 while let Some(ch) = state.peek() {
89 if ch == '\n' || ch == '\r' {
90 break;
91 }
92 state.advance(ch.len_utf8());
93 }
94 state.add_token(DelphiSyntaxKind::LineComment, start, state.get_position());
95 return true;
96 }
97
98 if state.consume_if_starts_with("{") {
100 let mut depth = 1usize;
101 while let Some(ch) = state.peek() {
102 if ch == '{' {
103 depth += 1;
104 }
105 else if ch == '}' {
106 depth -= 1;
107 if depth == 0 {
108 state.advance(1);
109 break;
110 }
111 }
112 state.advance(ch.len_utf8());
113 }
114 state.add_token(DelphiSyntaxKind::BlockComment, start, state.get_position());
115 return true;
116 }
117
118 if state.consume_if_starts_with("(*") {
120 while let Some(ch) = state.peek() {
121 if state.consume_if_starts_with("*)") {
122 break;
123 }
124 state.advance(ch.len_utf8());
125 }
126 state.add_token(DelphiSyntaxKind::BlockComment, start, state.get_position());
127 return true;
128 }
129
130 false
131 }
132
133 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
134 let start = state.get_position();
135 if let Some('\'') = state.peek() {
136 state.advance(1);
137 while let Some(ch) = state.peek() {
138 if ch == '\'' {
139 state.advance(1);
140 if state.peek() == Some('\'') {
141 state.advance(1);
143 continue;
144 }
145 break;
146 }
147 state.advance(ch.len_utf8());
148 }
149 state.add_token(DelphiSyntaxKind::String, start, state.get_position());
150 return true;
151 }
152 false
153 }
154
155 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156 let start = state.get_position();
157 let first = match state.peek() {
158 Some(c) => c,
159 None => return false,
160 };
161
162 if !first.is_ascii_digit() && first != '$' {
163 return false;
164 }
165
166 let mut is_float = false;
167
168 if first == '$' {
170 state.advance(1);
171 while let Some(c) = state.peek() {
172 if c.is_ascii_hexdigit() {
173 state.advance(1);
174 }
175 else {
176 break;
177 }
178 }
179 }
180 else {
181 state.advance(1);
183 while let Some(c) = state.peek() {
184 if c.is_ascii_digit() {
185 state.advance(1);
186 }
187 else {
188 break;
189 }
190 }
191
192 if state.peek() == Some('.') {
194 let next = state.peek_next_n(1);
195 if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
196 is_float = true;
197 state.advance(1); while let Some(c) = state.peek() {
199 if c.is_ascii_digit() {
200 state.advance(1);
201 }
202 else {
203 break;
204 }
205 }
206 }
207 }
208
209 if let Some(c) = state.peek() {
211 if c == 'e' || c == 'E' {
212 let next = state.peek_next_n(1);
213 if next == Some('+') || next == Some('-') || next.map(|d| d.is_ascii_digit()).unwrap_or(false) {
214 is_float = true;
215 state.advance(1);
216 if let Some(sign) = state.peek() {
217 if sign == '+' || sign == '-' {
218 state.advance(1);
219 }
220 }
221 while let Some(d) = state.peek() {
222 if d.is_ascii_digit() {
223 state.advance(1);
224 }
225 else {
226 break;
227 }
228 }
229 }
230 }
231 }
232 }
233
234 state.add_token(if is_float { DelphiSyntaxKind::Float } else { DelphiSyntaxKind::Number }, start, state.get_position());
235 true
236 }
237
238 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
239 let start = state.get_position();
240 let ch = match state.peek() {
241 Some(c) => c,
242 None => return false,
243 };
244
245 if !(ch.is_ascii_alphabetic() || ch == '_') {
246 return false;
247 }
248
249 state.advance(ch.len_utf8());
250 while let Some(c) = state.peek() {
251 if c.is_ascii_alphanumeric() || c == '_' {
252 state.advance(c.len_utf8());
253 }
254 else {
255 break;
256 }
257 }
258
259 let end = state.get_position();
260 let text = state.get_text_in((start..end).into());
261 let kind = match text.to_lowercase().as_str() {
262 "and" => DelphiSyntaxKind::And_,
263 "array" => DelphiSyntaxKind::Array,
264 "as" => DelphiSyntaxKind::As_,
265 "begin" => DelphiSyntaxKind::Begin,
266 "case" => DelphiSyntaxKind::Case,
267 "class" => DelphiSyntaxKind::Class,
268 "const" => DelphiSyntaxKind::Const,
269 "div" => DelphiSyntaxKind::Div,
270 "do" => DelphiSyntaxKind::Do,
271 "downto" => DelphiSyntaxKind::Downto,
272 "else" => DelphiSyntaxKind::Else,
273 "end" => DelphiSyntaxKind::End,
274 "except" => DelphiSyntaxKind::Except,
275 "false" => DelphiSyntaxKind::False_,
276 "finally" => DelphiSyntaxKind::Finally,
277 "for" => DelphiSyntaxKind::For,
278 "function" => DelphiSyntaxKind::Function,
279 "if" => DelphiSyntaxKind::If,
280 "implementation" => DelphiSyntaxKind::Implementation,
281 "in" => DelphiSyntaxKind::In_,
282 "interface" => DelphiSyntaxKind::Interface,
283 "is" => DelphiSyntaxKind::Is_,
284 "mod" => DelphiSyntaxKind::Mod,
285 "nil" => DelphiSyntaxKind::Nil,
286 "not" => DelphiSyntaxKind::Not_,
287 "object" => DelphiSyntaxKind::Object,
288 "of" => DelphiSyntaxKind::Of,
289 "or" => DelphiSyntaxKind::Or_,
290 "procedure" => DelphiSyntaxKind::Procedure,
291 "program" => DelphiSyntaxKind::Program,
292 "record" => DelphiSyntaxKind::Record,
293 "repeat" => DelphiSyntaxKind::Repeat,
294 "set" => DelphiSyntaxKind::Set,
295 "then" => DelphiSyntaxKind::Then,
296 "to" => DelphiSyntaxKind::To,
297 "true" => DelphiSyntaxKind::True_,
298 "try" => DelphiSyntaxKind::Try,
299 "type" => DelphiSyntaxKind::Type,
300 "unit" => DelphiSyntaxKind::Unit,
301 "until" => DelphiSyntaxKind::Until,
302 "uses" => DelphiSyntaxKind::Uses,
303 "var" => DelphiSyntaxKind::Var,
304 "while" => DelphiSyntaxKind::While,
305 "with" => DelphiSyntaxKind::With,
306 _ => DelphiSyntaxKind::Identifier,
307 };
308
309 state.add_token(kind, start, state.get_position());
310 true
311 }
312
313 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
314 let start = state.get_position();
315
316 let patterns: &[(&str, DelphiSyntaxKind)] = &[(":=", DelphiSyntaxKind::Assign), ("<=", DelphiSyntaxKind::LessEqual), (">=", DelphiSyntaxKind::GreaterEqual), ("<>", DelphiSyntaxKind::NotEqual), ("..", DelphiSyntaxKind::DotDot)];
318
319 for (pat, kind) in patterns {
320 if state.consume_if_starts_with(pat) {
321 state.add_token(*kind, start, state.get_position());
322 return true;
323 }
324 }
325
326 if let Some(ch) = state.peek() {
328 let kind = match ch {
329 '+' => Some(DelphiSyntaxKind::Plus),
330 '-' => Some(DelphiSyntaxKind::Minus),
331 '*' => Some(DelphiSyntaxKind::Star),
332 '/' => Some(DelphiSyntaxKind::Slash),
333 '=' => Some(DelphiSyntaxKind::Equal),
334 '<' => Some(DelphiSyntaxKind::Less),
335 '>' => Some(DelphiSyntaxKind::Greater),
336 '.' => Some(DelphiSyntaxKind::Dot),
337 ':' => Some(DelphiSyntaxKind::Colon),
338 '^' => Some(DelphiSyntaxKind::Caret),
339 '@' => Some(DelphiSyntaxKind::At),
340 _ => None,
341 };
342
343 if let Some(k) = kind {
344 state.advance(ch.len_utf8());
345 state.add_token(k, start, state.get_position());
346 return true;
347 }
348 }
349
350 false
351 }
352
353 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
354 let start = state.get_position();
355
356 if let Some(ch) = state.peek() {
357 let kind = match ch {
358 '(' => DelphiSyntaxKind::LeftParen,
359 ')' => DelphiSyntaxKind::RightParen,
360 '[' => DelphiSyntaxKind::LeftBracket,
361 ']' => DelphiSyntaxKind::RightBracket,
362 ',' => DelphiSyntaxKind::Comma,
363 ';' => DelphiSyntaxKind::Semicolon,
364 _ => return false,
365 };
366
367 state.advance(ch.len_utf8());
368 state.add_token(kind, start, state.get_position());
369 true
370 }
371 else {
372 false
373 }
374 }
375}
376
377impl<'config> Lexer<DelphiLanguage> for DelphiLexer<'config> {
378 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DelphiLanguage>) -> LexOutput<DelphiLanguage> {
379 let mut state = LexerState::new(source);
380 let result = self.run(&mut state);
381 if result.is_ok() {
382 state.add_eof();
383 }
384 state.finish_with_cache(result, cache)
385 }
386}