1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DelphiLanguage, lexer::token_type::DelphiTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6
7pub(crate) type State<'a, S> = LexerState<'a, S, DelphiLanguage>;
8
9#[derive(Clone, Debug)]
11pub struct DelphiLexer<'config> {
12 config: &'config DelphiLanguage,
13}
14
15impl<'config> DelphiLexer<'config> {
16 pub fn new(config: &'config DelphiLanguage) -> Self {
18 Self { config }
19 }
20
21 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
22 while state.not_at_end() {
23 let safe_point = state.get_position();
24
25 if self.skip_whitespace(state) {
26 continue;
27 }
28
29 if self.skip_comment(state) {
30 continue;
31 }
32
33 if self.lex_string_literal(state) {
34 continue;
35 }
36
37 if self.lex_number_literal(state) {
38 continue;
39 }
40
41 if self.lex_identifier_or_keyword(state) {
42 continue;
43 }
44
45 if self.lex_operators(state) {
46 continue;
47 }
48
49 if self.lex_single_char_tokens(state) {
50 continue;
51 }
52
53 let start_pos = state.get_position();
55 if let Some(ch) = state.peek() {
56 state.advance(ch.len_utf8());
57 state.add_token(DelphiTokenType::Error, start_pos, state.get_position())
58 }
59
60 state.advance_if_dead_lock(safe_point)
61 }
62
63 Ok(())
64 }
65
66 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
67 let start_pos = state.get_position();
68 let mut consumed = false;
69 while let Some(ch) = state.peek() {
70 if ch.is_whitespace() {
71 consumed = true;
72 state.advance(ch.len_utf8())
73 }
74 else {
75 break;
76 }
77 }
78 if consumed {
79 state.add_token(DelphiTokenType::Whitespace, start_pos, state.get_position());
80 true
81 }
82 else {
83 false
84 }
85 }
86
87 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 let start = state.get_position();
89
90 if state.consume_if_starts_with("//") {
92 while let Some(ch) = state.peek() {
93 if ch == '\n' || ch == '\r' {
94 break;
95 }
96 state.advance(ch.len_utf8())
97 }
98 state.add_token(DelphiTokenType::LineComment, start, state.get_position());
99 return true;
100 }
101
102 if state.consume_if_starts_with("{") {
104 let mut depth = 1usize;
105 while let Some(ch) = state.peek() {
106 if ch == '{' {
107 depth += 1
108 }
109 else if ch == '}' {
110 depth -= 1;
111 if depth == 0 {
112 state.advance(1);
113 break;
114 }
115 }
116 state.advance(ch.len_utf8())
117 }
118 state.add_token(DelphiTokenType::BlockComment, start, state.get_position());
119 return true;
120 }
121
122 if state.consume_if_starts_with("(*") {
124 while let Some(ch) = state.peek() {
125 if state.consume_if_starts_with("*)") {
126 break;
127 }
128 state.advance(ch.len_utf8())
129 }
130 state.add_token(DelphiTokenType::BlockComment, start, state.get_position());
131 return true;
132 }
133
134 false
135 }
136
137 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
138 let start = state.get_position();
139 if let Some('\'') = state.peek() {
140 state.advance(1);
141 while let Some(ch) = state.peek() {
142 if ch == '\'' {
143 state.advance(1);
144 if state.peek() == Some('\'') {
145 state.advance(1);
147 continue;
148 }
149 break;
150 }
151 state.advance(ch.len_utf8())
152 }
153 state.add_token(DelphiTokenType::String, start, state.get_position());
154 return true;
155 }
156 false
157 }
158
159 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
160 let start = state.get_position();
161 let first = match state.peek() {
162 Some(c) => c,
163 None => return false,
164 };
165
166 if !first.is_ascii_digit() && first != '$' {
167 return false;
168 }
169
170 let mut is_float = false;
171
172 if first == '$' {
174 state.advance(1);
175 while let Some(c) = state.peek() {
176 if c.is_ascii_hexdigit() { state.advance(1) } else { break }
177 }
178 }
179 else {
180 state.advance(1);
182 while let Some(c) = state.peek() {
183 if c.is_ascii_digit() { state.advance(1) } else { break }
184 }
185
186 if state.peek() == Some('.') {
188 let next = state.peek_next_n(1);
189 if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
190 is_float = true;
191 state.advance(1); while let Some(c) = state.peek() {
193 if c.is_ascii_digit() { state.advance(1) } else { break }
194 }
195 }
196 }
197
198 if let Some(c) = state.peek() {
200 if c == 'e' || c == 'E' {
201 let next = state.peek_next_n(1);
202 if next == Some('+') || next == Some('-') || next.map(|d| d.is_ascii_digit()).unwrap_or(false) {
203 is_float = true;
204 state.advance(1);
205 if let Some(sign) = state.peek() {
206 if sign == '+' || sign == '-' {
207 state.advance(1)
208 }
209 }
210 while let Some(d) = state.peek() {
211 if d.is_ascii_digit() { state.advance(1) } else { break }
212 }
213 }
214 }
215 }
216 }
217
218 state.add_token(if is_float { DelphiTokenType::Float } else { DelphiTokenType::Number }, start, state.get_position());
219 true
220 }
221
222 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
223 let start = state.get_position();
224 let ch = match state.peek() {
225 Some(c) => c,
226 None => return false,
227 };
228
229 if !(ch.is_ascii_alphabetic() || ch == '_') {
230 return false;
231 }
232
233 state.advance(ch.len_utf8());
234 while let Some(c) = state.peek() {
235 if c.is_ascii_alphanumeric() || c == '_' { state.advance(c.len_utf8()) } else { break }
236 }
237
238 let end = state.get_position();
239 let text = state.get_text_in((start..end).into());
240 let kind = match text.to_lowercase().as_str() {
241 "and" => DelphiTokenType::And_,
242 "array" => DelphiTokenType::Array,
243 "as" => DelphiTokenType::As_,
244 "begin" => DelphiTokenType::Begin,
245 "case" => DelphiTokenType::Case,
246 "class" => DelphiTokenType::Class,
247 "const" => DelphiTokenType::Const,
248 "div" => DelphiTokenType::Div,
249 "do" => DelphiTokenType::Do,
250 "downto" => DelphiTokenType::Downto,
251 "else" => DelphiTokenType::Else,
252 "end" => DelphiTokenType::End,
253 "except" => DelphiTokenType::Except,
254 "false" => DelphiTokenType::False_,
255 "finally" => DelphiTokenType::Finally,
256 "for" => DelphiTokenType::For,
257 "function" => DelphiTokenType::Function,
258 "if" => DelphiTokenType::If,
259 "implementation" => DelphiTokenType::Implementation,
260 "in" => DelphiTokenType::In_,
261 "interface" => DelphiTokenType::Interface,
262 "is" => DelphiTokenType::Is_,
263 "mod" => DelphiTokenType::Mod,
264 "nil" => DelphiTokenType::Nil,
265 "not" => DelphiTokenType::Not_,
266 "object" => DelphiTokenType::Object,
267 "of" => DelphiTokenType::Of,
268 "or" => DelphiTokenType::Or_,
269 "procedure" => DelphiTokenType::Procedure,
270 "program" => DelphiTokenType::Program,
271 "record" => DelphiTokenType::Record,
272 "repeat" => DelphiTokenType::Repeat,
273 "set" => DelphiTokenType::Set,
274 "then" => DelphiTokenType::Then,
275 "to" => DelphiTokenType::To,
276 "true" => DelphiTokenType::True_,
277 "try" => DelphiTokenType::Try,
278 "type" => DelphiTokenType::Type,
279 "unit" => DelphiTokenType::Unit,
280 "until" => DelphiTokenType::Until,
281 "uses" => DelphiTokenType::Uses,
282 "var" => DelphiTokenType::Var,
283 "while" => DelphiTokenType::While,
284 "with" => DelphiTokenType::With,
285 _ => DelphiTokenType::Identifier,
286 };
287
288 state.add_token(kind, start, state.get_position());
289 true
290 }
291
292 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
293 let start = state.get_position();
294
295 let patterns: &[(&str, DelphiTokenType)] = &[(":=", DelphiTokenType::Assign), ("<=", DelphiTokenType::LessEqual), (">=", DelphiTokenType::GreaterEqual), ("<>", DelphiTokenType::NotEqual), ("..", DelphiTokenType::DotDot)];
297
298 for (pat, kind) in patterns {
299 if state.consume_if_starts_with(pat) {
300 state.add_token(*kind, start, state.get_position());
301 return true;
302 }
303 }
304
305 if let Some(ch) = state.peek() {
307 let kind = match ch {
308 '+' => Some(DelphiTokenType::Plus),
309 '-' => Some(DelphiTokenType::Minus),
310 '*' => Some(DelphiTokenType::Star),
311 '/' => Some(DelphiTokenType::Slash),
312 '=' => Some(DelphiTokenType::Equal),
313 '<' => Some(DelphiTokenType::Less),
314 '>' => Some(DelphiTokenType::Greater),
315 '.' => Some(DelphiTokenType::Dot),
316 ':' => Some(DelphiTokenType::Colon),
317 '^' => Some(DelphiTokenType::Caret),
318 '@' => Some(DelphiTokenType::At),
319 _ => None,
320 };
321
322 if let Some(k) = kind {
323 state.advance(ch.len_utf8());
324 state.add_token(k, start, state.get_position());
325 return true;
326 }
327 }
328
329 false
330 }
331
332 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
333 let start = state.get_position();
334
335 if let Some(ch) = state.peek() {
336 let kind = match ch {
337 '(' => DelphiTokenType::LeftParen,
338 ')' => DelphiTokenType::RightParen,
339 '[' => DelphiTokenType::LeftBracket,
340 ']' => DelphiTokenType::RightBracket,
341 ',' => DelphiTokenType::Comma,
342 ';' => DelphiTokenType::Semicolon,
343 _ => return false,
344 };
345
346 state.advance(ch.len_utf8());
347 state.add_token(kind, start, state.get_position());
348 true
349 }
350 else {
351 false
352 }
353 }
354}
355
356impl<'config> Lexer<DelphiLanguage> for DelphiLexer<'config> {
357 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DelphiLanguage>) -> LexOutput<DelphiLanguage> {
358 let mut state = State::new_with_cache(source, 0, cache);
359 let result = self.run(&mut state);
360 if result.is_ok() {
361 state.add_eof()
362 }
363 state.finish_with_cache(result, cache)
364 }
365}