1use crate::{kind::PrologSyntaxKind, language::PrologLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'s, S> = LexerState<'s, S, PrologLanguage>;
5
6#[derive(Clone, Default)]
7pub struct PrologLexer {}
8
9impl PrologLexer {
10 pub fn new(_config: &PrologLanguage) -> Self {
11 Self {}
12 }
13
14 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
15 while state.not_at_end() {
16 if self.skip_whitespace(state) {
17 continue;
18 }
19
20 if self.lex_newline(state) {
21 continue;
22 }
23
24 if self.lex_comment(state) {
25 continue;
26 }
27
28 if self.lex_string(state) {
29 continue;
30 }
31
32 if self.lex_number(state) {
33 continue;
34 }
35
36 if self.lex_atom_or_keyword(state) {
37 continue;
38 }
39
40 if self.lex_variable(state) {
41 continue;
42 }
43
44 if self.lex_operators_and_punctuation(state) {
45 continue;
46 }
47
48 if let Some(ch) = state.peek() {
50 let start_pos = state.get_position();
51 state.advance(ch.len_utf8());
52 state.add_token(PrologSyntaxKind::Error, start_pos, state.get_position());
53 }
54 else {
55 break;
57 }
58 }
59
60 let pos = state.get_position();
62 state.add_token(PrologSyntaxKind::Eof, pos, pos);
63
64 Ok(())
65 }
66
67 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
68 let start_pos = state.get_position();
69
70 while let Some(ch) = state.peek() {
71 if ch == ' ' || ch == '\t' {
72 state.advance(ch.len_utf8());
73 }
74 else {
75 break;
76 }
77 }
78
79 if state.get_position() > start_pos {
80 state.add_token(PrologSyntaxKind::Whitespace, start_pos, state.get_position());
81 true
82 }
83 else {
84 false
85 }
86 }
87
88 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
89 let start_pos = state.get_position();
90
91 if let Some('\n') = state.peek() {
92 state.advance(1);
93 state.add_token(PrologSyntaxKind::Newline, start_pos, state.get_position());
94 true
95 }
96 else if let Some('\r') = state.peek() {
97 state.advance(1);
98 if let Some('\n') = state.peek() {
99 state.advance(1);
100 }
101 state.add_token(PrologSyntaxKind::Newline, start_pos, state.get_position());
102 true
103 }
104 else {
105 false
106 }
107 }
108
109 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
110 let start_pos = state.get_position();
111
112 if let Some('%') = state.peek() {
113 state.advance(1);
114 while let Some(ch) = state.peek() {
116 if ch == '\n' || ch == '\r' {
117 break;
118 }
119 state.advance(ch.len_utf8());
120 }
121 state.add_token(PrologSyntaxKind::Comment, start_pos, state.get_position());
122 true
123 }
124 else if let Some('/') = state.peek() {
125 state.advance(1);
126 if let Some('*') = state.peek() {
127 state.advance(1);
128 while let Some(ch) = state.peek() {
130 if ch == '*' {
131 state.advance(1);
132 if let Some('/') = state.peek() {
133 state.advance(1);
134 break;
135 }
136 }
137 else {
138 state.advance(ch.len_utf8());
139 }
140 }
141 state.add_token(PrologSyntaxKind::Comment, start_pos, state.get_position());
142 true
143 }
144 else {
145 state.set_position(start_pos);
147 false
148 }
149 }
150 else {
151 false
152 }
153 }
154
155 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
156 let start_pos = state.get_position();
157
158 if let Some(quote_char) = state.peek() {
159 if quote_char == '"' || quote_char == '\'' {
160 state.advance(1); let mut escaped = false;
163 while let Some(ch) = state.peek() {
164 if escaped {
165 escaped = false;
166 state.advance(ch.len_utf8());
167 }
168 else if ch == '\\' {
169 escaped = true;
170 state.advance(1);
171 }
172 else if ch == quote_char {
173 state.advance(1); break;
175 }
176 else if ch == '\n' || ch == '\r' {
177 break;
179 }
180 else {
181 state.advance(ch.len_utf8());
182 }
183 }
184
185 state.add_token(PrologSyntaxKind::String, start_pos, state.get_position());
186 true
187 }
188 else {
189 false
190 }
191 }
192 else {
193 false
194 }
195 }
196
197 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
198 if let Some(ch) = state.peek() {
199 if ch.is_ascii_digit() {
200 let start_pos = state.get_position();
201
202 while let Some(ch) = state.peek() {
204 if ch.is_ascii_digit() {
205 state.advance(1);
206 }
207 else {
208 break;
209 }
210 }
211
212 if let Some('.') = state.peek() {
214 state.advance(1);
215 while let Some(ch) = state.peek() {
217 if ch.is_ascii_digit() {
218 state.advance(1);
219 }
220 else {
221 break;
222 }
223 }
224 }
225
226 if let Some(ch) = state.peek() {
228 if ch == 'e' || ch == 'E' {
229 state.advance(1);
230 if let Some(ch) = state.peek() {
231 if ch == '+' || ch == '-' {
232 state.advance(1);
233 }
234 }
235 while let Some(ch) = state.peek() {
236 if ch.is_ascii_digit() {
237 state.advance(1);
238 }
239 else {
240 break;
241 }
242 }
243 }
244 }
245
246 state.add_token(PrologSyntaxKind::Integer, start_pos, state.get_position());
247 true
248 }
249 else {
250 false
251 }
252 }
253 else {
254 false
255 }
256 }
257
258 fn lex_atom_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
259 if let Some(ch) = state.peek() {
260 if ch.is_ascii_lowercase() || ch == '_' {
261 let start_pos = state.get_position();
262 let mut text = String::new();
263
264 while let Some(ch) = state.peek() {
266 if ch.is_alphanumeric() || ch == '_' {
267 text.push(ch);
268 state.advance(ch.len_utf8());
269 }
270 else {
271 break;
272 }
273 }
274
275 let kind = match text.as_str() {
277 "is" => PrologSyntaxKind::Is,
278 "mod" => PrologSyntaxKind::Modulo,
279 _ => PrologSyntaxKind::Atom,
280 };
281
282 state.add_token(kind, start_pos, state.get_position());
283 true
284 }
285 else {
286 false
287 }
288 }
289 else {
290 false
291 }
292 }
293
294 fn lex_variable<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
295 if let Some(ch) = state.peek() {
296 if ch.is_ascii_uppercase() || ch == '_' {
297 let start_pos = state.get_position();
298
299 while let Some(ch) = state.peek() {
301 if ch.is_alphanumeric() || ch == '_' {
302 state.advance(ch.len_utf8());
303 }
304 else {
305 break;
306 }
307 }
308
309 state.add_token(PrologSyntaxKind::Variable, start_pos, state.get_position());
310 true
311 }
312 else {
313 false
314 }
315 }
316 else {
317 false
318 }
319 }
320
321 fn lex_operators_and_punctuation<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
322 if let Some(ch) = state.peek() {
323 let start_pos = state.get_position();
324
325 let kind = match ch {
326 '+' => {
327 state.advance(1);
328 PrologSyntaxKind::Plus
329 }
330 '-' => {
331 state.advance(1);
332 PrologSyntaxKind::Minus
333 }
334 '*' => {
335 state.advance(1);
336 if let Some('*') = state.peek() {
337 state.advance(1);
338 PrologSyntaxKind::Power
339 }
340 else {
341 PrologSyntaxKind::Multiply
342 }
343 }
344 '/' => {
345 state.advance(1);
346 if let Some('/') = state.peek() {
347 state.advance(1);
348 PrologSyntaxKind::IntDivide
349 }
350 else {
351 PrologSyntaxKind::Divide
352 }
353 }
354 '=' => {
355 state.advance(1);
356 if let Some('=') = state.peek() {
357 state.advance(1);
358 PrologSyntaxKind::Equal
359 }
360 else if let Some(':') = state.peek() {
361 state.advance(1);
362 if let Some('=') = state.peek() {
363 state.advance(1);
364 PrologSyntaxKind::ArithEqual
365 }
366 else {
367 state.set_position(start_pos + 1);
369 PrologSyntaxKind::Unify
370 }
371 }
372 else if let Some('\\') = state.peek() {
373 state.advance(1);
374 if let Some('=') = state.peek() {
375 state.advance(1);
376 PrologSyntaxKind::NotUnify
377 }
378 else {
379 state.set_position(start_pos + 1);
381 PrologSyntaxKind::Unify
382 }
383 }
384 else if let Some('<') = state.peek() {
385 state.advance(1);
386 PrologSyntaxKind::ArithNotEqual
387 }
388 else {
389 PrologSyntaxKind::Unify
390 }
391 }
392 '<' => {
393 state.advance(1);
394 if let Some('=') = state.peek() {
395 state.advance(1);
396 PrologSyntaxKind::LessEqual
397 }
398 else {
399 PrologSyntaxKind::Less
400 }
401 }
402 '>' => {
403 state.advance(1);
404 if let Some('=') = state.peek() {
405 state.advance(1);
406 PrologSyntaxKind::GreaterEqual
407 }
408 else {
409 PrologSyntaxKind::Greater
410 }
411 }
412 '\\' => {
413 state.advance(1);
414 if let Some('=') = state.peek() {
415 state.advance(1);
416 if let Some('=') = state.peek() {
417 state.advance(1);
418 PrologSyntaxKind::NotEqual
419 }
420 else {
421 PrologSyntaxKind::NotUnify
422 }
423 }
424 else {
425 PrologSyntaxKind::BitwiseNot
426 }
427 }
428 '!' => {
429 state.advance(1);
430 PrologSyntaxKind::Cut
431 }
432 '?' => {
433 state.advance(1);
434 PrologSyntaxKind::Question
435 }
436 ':' => {
437 state.advance(1);
438 if let Some('-') = state.peek() {
439 state.advance(1);
440 PrologSyntaxKind::ColonMinus
441 }
442 else {
443 PrologSyntaxKind::Colon
444 }
445 }
446 ';' => {
447 state.advance(1);
448 PrologSyntaxKind::Semicolon
449 }
450 ',' => {
451 state.advance(1);
452 PrologSyntaxKind::Comma
453 }
454 '.' => {
455 state.advance(1);
456 PrologSyntaxKind::Dot
457 }
458 '(' => {
459 state.advance(1);
460 PrologSyntaxKind::LeftParen
461 }
462 ')' => {
463 state.advance(1);
464 PrologSyntaxKind::RightParen
465 }
466 '[' => {
467 state.advance(1);
468 PrologSyntaxKind::LeftBracket
469 }
470 ']' => {
471 state.advance(1);
472 PrologSyntaxKind::RightBracket
473 }
474 '{' => {
475 state.advance(1);
476 PrologSyntaxKind::LeftBrace
477 }
478 '}' => {
479 state.advance(1);
480 PrologSyntaxKind::RightBrace
481 }
482 '|' => {
483 state.advance(1);
484 PrologSyntaxKind::Pipe
485 }
486 '^' => {
487 state.advance(1);
488 PrologSyntaxKind::BitwiseXor
489 }
490 _ => return false,
491 };
492
493 state.add_token(kind, start_pos, state.get_position());
494 true
495 }
496 else {
497 false
498 }
499 }
500}
501
502impl Lexer<PrologLanguage> for PrologLexer {
503 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PrologLanguage>) -> LexOutput<PrologLanguage> {
504 let mut state = LexerState::new(source);
505 let result = self.run(&mut state);
506 if result.is_ok() {
507 }
510 state.finish_with_cache(result, cache)
511 }
512}