1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DelphiLanguage, lexer::token_type::DelphiTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, DelphiLanguage>;
8
9#[derive(Clone, Debug)]
11pub struct DelphiLexer<'config> {
12 _config: &'config DelphiLanguage,
13}
14
15impl<'config> DelphiLexer<'config> {
16 pub fn new(config: &'config DelphiLanguage) -> Self {
17 Self { _config: config }
18 }
19
20 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
21 while state.not_at_end() {
22 let safe_point = state.get_position();
23
24 if self.skip_whitespace(state) {
25 continue;
26 }
27
28 if self.skip_comment(state) {
29 continue;
30 }
31
32 if self.lex_string_literal(state) {
33 continue;
34 }
35
36 if self.lex_number_literal(state) {
37 continue;
38 }
39
40 if self.lex_identifier_or_keyword(state) {
41 continue;
42 }
43
44 if self.lex_operators(state) {
45 continue;
46 }
47
48 if self.lex_single_char_tokens(state) {
49 continue;
50 }
51
52 let start_pos = state.get_position();
54 if let Some(ch) = state.peek() {
55 state.advance(ch.len_utf8());
56 state.add_token(DelphiTokenType::Error, start_pos, state.get_position())
57 }
58
59 state.advance_if_dead_lock(safe_point)
60 }
61
62 Ok(())
63 }
64
65 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
66 let start_pos = state.get_position();
67 let mut consumed = false;
68 while let Some(ch) = state.peek() {
69 if ch.is_whitespace() {
70 consumed = true;
71 state.advance(ch.len_utf8())
72 }
73 else {
74 break;
75 }
76 }
77 if consumed {
78 state.add_token(DelphiTokenType::Whitespace, start_pos, state.get_position());
79 true
80 }
81 else {
82 false
83 }
84 }
85
86 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87 let start = state.get_position();
88
89 if state.consume_if_starts_with("//") {
91 while let Some(ch) = state.peek() {
92 if ch == '\n' || ch == '\r' {
93 break;
94 }
95 state.advance(ch.len_utf8())
96 }
97 state.add_token(DelphiTokenType::LineComment, start, state.get_position());
98 return true;
99 }
100
101 if state.consume_if_starts_with("{") {
103 let mut depth = 1usize;
104 while let Some(ch) = state.peek() {
105 if ch == '{' {
106 depth += 1
107 }
108 else if ch == '}' {
109 depth -= 1;
110 if depth == 0 {
111 state.advance(1);
112 break;
113 }
114 }
115 state.advance(ch.len_utf8())
116 }
117 state.add_token(DelphiTokenType::BlockComment, start, state.get_position());
118 return true;
119 }
120
121 if state.consume_if_starts_with("(*") {
123 while let Some(ch) = state.peek() {
124 if state.consume_if_starts_with("*)") {
125 break;
126 }
127 state.advance(ch.len_utf8())
128 }
129 state.add_token(DelphiTokenType::BlockComment, start, state.get_position());
130 return true;
131 }
132
133 false
134 }
135
136 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
137 let start = state.get_position();
138 if let Some('\'') = state.peek() {
139 state.advance(1);
140 while let Some(ch) = state.peek() {
141 if ch == '\'' {
142 state.advance(1);
143 if state.peek() == Some('\'') {
144 state.advance(1);
146 continue;
147 }
148 break;
149 }
150 state.advance(ch.len_utf8())
151 }
152 state.add_token(DelphiTokenType::String, start, state.get_position());
153 return true;
154 }
155 false
156 }
157
158 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
159 let start = state.get_position();
160 let first = match state.peek() {
161 Some(c) => c,
162 None => return false,
163 };
164
165 if !first.is_ascii_digit() && first != '$' {
166 return false;
167 }
168
169 let mut is_float = false;
170
171 if first == '$' {
173 state.advance(1);
174 while let Some(c) = state.peek() {
175 if c.is_ascii_hexdigit() { state.advance(1) } else { break }
176 }
177 }
178 else {
179 state.advance(1);
181 while let Some(c) = state.peek() {
182 if c.is_ascii_digit() { state.advance(1) } else { break }
183 }
184
185 if state.peek() == Some('.') {
187 let next = state.peek_next_n(1);
188 if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
189 is_float = true;
190 state.advance(1); while let Some(c) = state.peek() {
192 if c.is_ascii_digit() { state.advance(1) } else { break }
193 }
194 }
195 }
196
197 if let Some(c) = state.peek() {
199 if c == 'e' || c == 'E' {
200 let next = state.peek_next_n(1);
201 if next == Some('+') || next == Some('-') || next.map(|d| d.is_ascii_digit()).unwrap_or(false) {
202 is_float = true;
203 state.advance(1);
204 if let Some(sign) = state.peek() {
205 if sign == '+' || sign == '-' {
206 state.advance(1)
207 }
208 }
209 while let Some(d) = state.peek() {
210 if d.is_ascii_digit() { state.advance(1) } else { break }
211 }
212 }
213 }
214 }
215 }
216
217 state.add_token(if is_float { DelphiTokenType::Float } else { DelphiTokenType::Number }, start, state.get_position());
218 true
219 }
220
221 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
222 let start = state.get_position();
223 let ch = match state.peek() {
224 Some(c) => c,
225 None => return false,
226 };
227
228 if !(ch.is_ascii_alphabetic() || ch == '_') {
229 return false;
230 }
231
232 state.advance(ch.len_utf8());
233 while let Some(c) = state.peek() {
234 if c.is_ascii_alphanumeric() || c == '_' { state.advance(c.len_utf8()) } else { break }
235 }
236
237 let end = state.get_position();
238 let text = state.get_text_in((start..end).into());
239 let kind = match text.to_lowercase().as_str() {
240 "and" => DelphiTokenType::And_,
241 "array" => DelphiTokenType::Array,
242 "as" => DelphiTokenType::As_,
243 "begin" => DelphiTokenType::Begin,
244 "case" => DelphiTokenType::Case,
245 "class" => DelphiTokenType::Class,
246 "const" => DelphiTokenType::Const,
247 "div" => DelphiTokenType::Div,
248 "do" => DelphiTokenType::Do,
249 "downto" => DelphiTokenType::Downto,
250 "else" => DelphiTokenType::Else,
251 "end" => DelphiTokenType::End,
252 "except" => DelphiTokenType::Except,
253 "false" => DelphiTokenType::False_,
254 "finally" => DelphiTokenType::Finally,
255 "for" => DelphiTokenType::For,
256 "function" => DelphiTokenType::Function,
257 "if" => DelphiTokenType::If,
258 "implementation" => DelphiTokenType::Implementation,
259 "in" => DelphiTokenType::In_,
260 "interface" => DelphiTokenType::Interface,
261 "is" => DelphiTokenType::Is_,
262 "mod" => DelphiTokenType::Mod,
263 "nil" => DelphiTokenType::Nil,
264 "not" => DelphiTokenType::Not_,
265 "object" => DelphiTokenType::Object,
266 "of" => DelphiTokenType::Of,
267 "or" => DelphiTokenType::Or_,
268 "procedure" => DelphiTokenType::Procedure,
269 "program" => DelphiTokenType::Program,
270 "record" => DelphiTokenType::Record,
271 "repeat" => DelphiTokenType::Repeat,
272 "set" => DelphiTokenType::Set,
273 "then" => DelphiTokenType::Then,
274 "to" => DelphiTokenType::To,
275 "true" => DelphiTokenType::True_,
276 "try" => DelphiTokenType::Try,
277 "type" => DelphiTokenType::Type,
278 "unit" => DelphiTokenType::Unit,
279 "until" => DelphiTokenType::Until,
280 "uses" => DelphiTokenType::Uses,
281 "var" => DelphiTokenType::Var,
282 "while" => DelphiTokenType::While,
283 "with" => DelphiTokenType::With,
284 _ => DelphiTokenType::Identifier,
285 };
286
287 state.add_token(kind, start, state.get_position());
288 true
289 }
290
291 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
292 let start = state.get_position();
293
294 let patterns: &[(&str, DelphiTokenType)] = &[(":=", DelphiTokenType::Assign), ("<=", DelphiTokenType::LessEqual), (">=", DelphiTokenType::GreaterEqual), ("<>", DelphiTokenType::NotEqual), ("..", DelphiTokenType::DotDot)];
296
297 for (pat, kind) in patterns {
298 if state.consume_if_starts_with(pat) {
299 state.add_token(*kind, start, state.get_position());
300 return true;
301 }
302 }
303
304 if let Some(ch) = state.peek() {
306 let kind = match ch {
307 '+' => Some(DelphiTokenType::Plus),
308 '-' => Some(DelphiTokenType::Minus),
309 '*' => Some(DelphiTokenType::Star),
310 '/' => Some(DelphiTokenType::Slash),
311 '=' => Some(DelphiTokenType::Equal),
312 '<' => Some(DelphiTokenType::Less),
313 '>' => Some(DelphiTokenType::Greater),
314 '.' => Some(DelphiTokenType::Dot),
315 ':' => Some(DelphiTokenType::Colon),
316 '^' => Some(DelphiTokenType::Caret),
317 '@' => Some(DelphiTokenType::At),
318 _ => None,
319 };
320
321 if let Some(k) = kind {
322 state.advance(ch.len_utf8());
323 state.add_token(k, start, state.get_position());
324 return true;
325 }
326 }
327
328 false
329 }
330
331 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
332 let start = state.get_position();
333
334 if let Some(ch) = state.peek() {
335 let kind = match ch {
336 '(' => DelphiTokenType::LeftParen,
337 ')' => DelphiTokenType::RightParen,
338 '[' => DelphiTokenType::LeftBracket,
339 ']' => DelphiTokenType::RightBracket,
340 ',' => DelphiTokenType::Comma,
341 ';' => DelphiTokenType::Semicolon,
342 _ => return false,
343 };
344
345 state.advance(ch.len_utf8());
346 state.add_token(kind, start, state.get_position());
347 true
348 }
349 else {
350 false
351 }
352 }
353}
354
355impl<'config> Lexer<DelphiLanguage> for DelphiLexer<'config> {
356 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DelphiLanguage>) -> LexOutput<DelphiLanguage> {
357 let mut state = State::new_with_cache(source, 0, cache);
358 let result = self.run(&mut state);
359 if result.is_ok() {
360 state.add_eof()
361 }
362 state.finish_with_cache(result, cache)
363 }
364}