1use crate::{kind::PrologSyntaxKind, language::PrologLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'s, S> = LexerState<'s, S, PrologLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct PrologLexer<'config> {
8 _config: &'config PrologLanguage,
9}
10
11impl<'config> PrologLexer<'config> {
12 pub fn new(config: &'config PrologLanguage) -> Self {
13 Self { _config: config }
14 }
15
16 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
17 while state.not_at_end() {
18 let safe_point = state.get_position();
19
20 if self.skip_whitespace(state) {
21 continue;
22 }
23
24 if self.lex_newline(state) {
25 continue;
26 }
27
28 if self.lex_comment(state) {
29 continue;
30 }
31
32 if self.lex_string(state) {
33 continue;
34 }
35
36 if self.lex_number(state) {
37 continue;
38 }
39
40 if self.lex_atom_or_keyword(state) {
41 continue;
42 }
43
44 if self.lex_variable(state) {
45 continue;
46 }
47
48 if self.lex_operators_and_punctuation(state) {
49 continue;
50 }
51
52 if let Some(ch) = state.peek() {
54 let start_pos = state.get_position();
55 state.advance(ch.len_utf8());
56 state.add_token(PrologSyntaxKind::Error, start_pos, state.get_position());
57 }
58
59 state.advance_if_dead_lock(safe_point);
60 }
61
62 Ok(())
63 }
64
65 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
66 let start_pos = state.get_position();
67
68 while let Some(ch) = state.peek() {
69 if ch == ' ' || ch == '\t' {
70 state.advance(ch.len_utf8());
71 }
72 else {
73 break;
74 }
75 }
76
77 if state.get_position() > start_pos {
78 state.add_token(PrologSyntaxKind::Whitespace, start_pos, state.get_position());
79 true
80 }
81 else {
82 false
83 }
84 }
85
86 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
87 let start_pos = state.get_position();
88
89 if let Some('\n') = state.peek() {
90 state.advance(1);
91 state.add_token(PrologSyntaxKind::Newline, start_pos, state.get_position());
92 true
93 }
94 else if let Some('\r') = state.peek() {
95 state.advance(1);
96 if let Some('\n') = state.peek() {
97 state.advance(1);
98 }
99 state.add_token(PrologSyntaxKind::Newline, start_pos, state.get_position());
100 true
101 }
102 else {
103 false
104 }
105 }
106
107 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
108 let start_pos = state.get_position();
109
110 if let Some('%') = state.peek() {
111 state.advance(1);
112 while let Some(ch) = state.peek() {
114 if ch == '\n' || ch == '\r' {
115 break;
116 }
117 state.advance(ch.len_utf8());
118 }
119 state.add_token(PrologSyntaxKind::Comment, start_pos, state.get_position());
120 true
121 }
122 else if let Some('/') = state.peek() {
123 state.advance(1);
124 if let Some('*') = state.peek() {
125 state.advance(1);
126 while let Some(ch) = state.peek() {
128 if ch == '*' {
129 state.advance(1);
130 if let Some('/') = state.peek() {
131 state.advance(1);
132 break;
133 }
134 }
135 else {
136 state.advance(ch.len_utf8());
137 }
138 }
139 state.add_token(PrologSyntaxKind::Comment, start_pos, state.get_position());
140 true
141 }
142 else {
143 state.set_position(start_pos);
145 false
146 }
147 }
148 else {
149 false
150 }
151 }
152
153 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
154 let start_pos = state.get_position();
155
156 if let Some(quote_char) = state.peek() {
157 if quote_char == '"' || quote_char == '\'' {
158 state.advance(1); let mut escaped = false;
161 while let Some(ch) = state.peek() {
162 if escaped {
163 escaped = false;
164 state.advance(ch.len_utf8());
165 }
166 else if ch == '\\' {
167 escaped = true;
168 state.advance(1);
169 }
170 else if ch == quote_char {
171 state.advance(1); break;
173 }
174 else if ch == '\n' || ch == '\r' {
175 break;
177 }
178 else {
179 state.advance(ch.len_utf8());
180 }
181 }
182
183 state.add_token(PrologSyntaxKind::String, start_pos, state.get_position());
184 true
185 }
186 else {
187 false
188 }
189 }
190 else {
191 false
192 }
193 }
194
195 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
196 if let Some(ch) = state.peek() {
197 if ch.is_ascii_digit() {
198 let start_pos = state.get_position();
199
200 while let Some(ch) = state.peek() {
202 if ch.is_ascii_digit() {
203 state.advance(1);
204 }
205 else {
206 break;
207 }
208 }
209
210 if let Some('.') = state.peek() {
212 state.advance(1);
213 while let Some(ch) = state.peek() {
215 if ch.is_ascii_digit() {
216 state.advance(1);
217 }
218 else {
219 break;
220 }
221 }
222 }
223
224 if let Some(ch) = state.peek() {
226 if ch == 'e' || ch == 'E' {
227 state.advance(1);
228 if let Some(ch) = state.peek() {
229 if ch == '+' || ch == '-' {
230 state.advance(1);
231 }
232 }
233 while let Some(ch) = state.peek() {
234 if ch.is_ascii_digit() {
235 state.advance(1);
236 }
237 else {
238 break;
239 }
240 }
241 }
242 }
243
244 state.add_token(PrologSyntaxKind::Integer, start_pos, state.get_position());
245 true
246 }
247 else {
248 false
249 }
250 }
251 else {
252 false
253 }
254 }
255
256 fn lex_atom_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
257 if let Some(ch) = state.peek() {
258 if ch.is_ascii_lowercase() || ch == '_' {
259 let start_pos = state.get_position();
260 let mut text = String::new();
261
262 while let Some(ch) = state.peek() {
264 if ch.is_alphanumeric() || ch == '_' {
265 text.push(ch);
266 state.advance(ch.len_utf8());
267 }
268 else {
269 break;
270 }
271 }
272
273 let kind = match text.as_str() {
275 "is" => PrologSyntaxKind::Is,
276 "mod" => PrologSyntaxKind::Modulo,
277 _ => PrologSyntaxKind::Atom,
278 };
279
280 state.add_token(kind, start_pos, state.get_position());
281 true
282 }
283 else {
284 false
285 }
286 }
287 else {
288 false
289 }
290 }
291
292 fn lex_variable<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
293 if let Some(ch) = state.peek() {
294 if ch.is_ascii_uppercase() || ch == '_' {
295 let start_pos = state.get_position();
296
297 while let Some(ch) = state.peek() {
299 if ch.is_alphanumeric() || ch == '_' {
300 state.advance(ch.len_utf8());
301 }
302 else {
303 break;
304 }
305 }
306
307 state.add_token(PrologSyntaxKind::Variable, start_pos, state.get_position());
308 true
309 }
310 else {
311 false
312 }
313 }
314 else {
315 false
316 }
317 }
318
319 fn lex_operators_and_punctuation<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
320 if let Some(ch) = state.peek() {
321 let start_pos = state.get_position();
322
323 let kind = match ch {
324 '+' => {
325 state.advance(1);
326 PrologSyntaxKind::Plus
327 }
328 '-' => {
329 state.advance(1);
330 PrologSyntaxKind::Minus
331 }
332 '*' => {
333 state.advance(1);
334 if let Some('*') = state.peek() {
335 state.advance(1);
336 PrologSyntaxKind::Power
337 }
338 else {
339 PrologSyntaxKind::Multiply
340 }
341 }
342 '/' => {
343 state.advance(1);
344 if let Some('/') = state.peek() {
345 state.advance(1);
346 PrologSyntaxKind::IntDivide
347 }
348 else {
349 PrologSyntaxKind::Divide
350 }
351 }
352 '=' => {
353 state.advance(1);
354 if let Some('=') = state.peek() {
355 state.advance(1);
356 PrologSyntaxKind::Equal
357 }
358 else if let Some(':') = state.peek() {
359 state.advance(1);
360 if let Some('=') = state.peek() {
361 state.advance(1);
362 PrologSyntaxKind::ArithEqual
363 }
364 else {
365 state.set_position(start_pos + 1);
367 PrologSyntaxKind::Unify
368 }
369 }
370 else if let Some('\\') = state.peek() {
371 state.advance(1);
372 if let Some('=') = state.peek() {
373 state.advance(1);
374 PrologSyntaxKind::NotUnify
375 }
376 else {
377 state.set_position(start_pos + 1);
379 PrologSyntaxKind::Unify
380 }
381 }
382 else if let Some('<') = state.peek() {
383 state.advance(1);
384 PrologSyntaxKind::ArithNotEqual
385 }
386 else {
387 PrologSyntaxKind::Unify
388 }
389 }
390 '<' => {
391 state.advance(1);
392 if let Some('=') = state.peek() {
393 state.advance(1);
394 PrologSyntaxKind::LessEqual
395 }
396 else {
397 PrologSyntaxKind::Less
398 }
399 }
400 '>' => {
401 state.advance(1);
402 if let Some('=') = state.peek() {
403 state.advance(1);
404 PrologSyntaxKind::GreaterEqual
405 }
406 else {
407 PrologSyntaxKind::Greater
408 }
409 }
410 '\\' => {
411 state.advance(1);
412 if let Some('=') = state.peek() {
413 state.advance(1);
414 if let Some('=') = state.peek() {
415 state.advance(1);
416 PrologSyntaxKind::NotEqual
417 }
418 else {
419 PrologSyntaxKind::NotUnify
420 }
421 }
422 else {
423 PrologSyntaxKind::BitwiseNot
424 }
425 }
426 '!' => {
427 state.advance(1);
428 PrologSyntaxKind::Cut
429 }
430 '?' => {
431 state.advance(1);
432 PrologSyntaxKind::Question
433 }
434 ':' => {
435 state.advance(1);
436 if let Some('-') = state.peek() {
437 state.advance(1);
438 PrologSyntaxKind::ColonMinus
439 }
440 else {
441 PrologSyntaxKind::Colon
442 }
443 }
444 ';' => {
445 state.advance(1);
446 PrologSyntaxKind::Semicolon
447 }
448 ',' => {
449 state.advance(1);
450 PrologSyntaxKind::Comma
451 }
452 '.' => {
453 state.advance(1);
454 PrologSyntaxKind::Dot
455 }
456 '(' => {
457 state.advance(1);
458 PrologSyntaxKind::LeftParen
459 }
460 ')' => {
461 state.advance(1);
462 PrologSyntaxKind::RightParen
463 }
464 '[' => {
465 state.advance(1);
466 PrologSyntaxKind::LeftBracket
467 }
468 ']' => {
469 state.advance(1);
470 PrologSyntaxKind::RightBracket
471 }
472 '{' => {
473 state.advance(1);
474 PrologSyntaxKind::LeftBrace
475 }
476 '}' => {
477 state.advance(1);
478 PrologSyntaxKind::RightBrace
479 }
480 '|' => {
481 state.advance(1);
482 PrologSyntaxKind::Pipe
483 }
484 '^' => {
485 state.advance(1);
486 PrologSyntaxKind::BitwiseXor
487 }
488 _ => return false,
489 };
490
491 state.add_token(kind, start_pos, state.get_position());
492 true
493 }
494 else {
495 false
496 }
497 }
498}
499
500impl<'config> Lexer<PrologLanguage> for PrologLexer<'config> {
501 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PrologLanguage>) -> LexOutput<PrologLanguage> {
502 let mut state = State::new_with_cache(source, 0, cache);
503 let result = self.run(&mut state);
504 if result.is_ok() {
505 state.add_eof();
506 }
507 state.finish_with_cache(result, cache)
508 }
509}