1#![doc = include_str!("readme.md")]
2pub mod token_type;
3pub use token_type::PrologTokenType;
4
5use crate::language::PrologLanguage;
6use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
7
8type State<'s, S> = LexerState<'s, S, PrologLanguage>;
9
10#[derive(Clone, Debug)]
11pub struct PrologLexer<'config> {
12 _config: &'config PrologLanguage,
13}
14
15impl<'config> PrologLexer<'config> {
16 pub fn new(config: &'config PrologLanguage) -> Self {
17 Self { _config: config }
18 }
19
20 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
21 while state.not_at_end() {
22 let safe_point = state.get_position();
23
24 if self.skip_whitespace(state) {
25 continue;
26 }
27
28 if self.lex_newline(state) {
29 continue;
30 }
31
32 if self.lex_comment(state) {
33 continue;
34 }
35
36 if self.lex_string(state) {
37 continue;
38 }
39
40 if self.lex_number(state) {
41 continue;
42 }
43
44 if self.lex_atom_or_keyword(state) {
45 continue;
46 }
47
48 if self.lex_variable(state) {
49 continue;
50 }
51
52 if self.lex_operators_and_punctuation(state) {
53 continue;
54 }
55
56 if let Some(ch) = state.peek() {
58 let start_pos = state.get_position();
59 state.advance(ch.len_utf8());
60 state.add_token(PrologTokenType::Error, start_pos, state.get_position())
61 }
62
63 state.advance_if_dead_lock(safe_point)
64 }
65
66 Ok(())
67 }
68
69 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
70 let start_pos = state.get_position();
71
72 while let Some(ch) = state.peek() {
73 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
74 }
75
76 if state.get_position() > start_pos {
77 state.add_token(PrologTokenType::Whitespace, start_pos, state.get_position());
78 true
79 }
80 else {
81 false
82 }
83 }
84
85 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86 let start_pos = state.get_position();
87
88 if let Some('\n') = state.peek() {
89 state.advance(1);
90 state.add_token(PrologTokenType::Newline, start_pos, state.get_position());
91 true
92 }
93 else if let Some('\r') = state.peek() {
94 state.advance(1);
95 if let Some('\n') = state.peek() {
96 state.advance(1)
97 }
98 state.add_token(PrologTokenType::Newline, start_pos, state.get_position());
99 true
100 }
101 else {
102 false
103 }
104 }
105
106 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
107 let start_pos = state.get_position();
108
109 if let Some('%') = state.peek() {
110 state.advance(1);
111 while let Some(ch) = state.peek() {
113 if ch == '\n' || ch == '\r' {
114 break;
115 }
116 state.advance(ch.len_utf8())
117 }
118 state.add_token(PrologTokenType::Comment, start_pos, state.get_position());
119 true
120 }
121 else if let Some('/') = state.peek() {
122 state.advance(1);
123 if let Some('*') = state.peek() {
124 state.advance(1);
125 while let Some(ch) = state.peek() {
127 if ch == '*' {
128 state.advance(1);
129 if let Some('/') = state.peek() {
130 state.advance(1);
131 break;
132 }
133 }
134 else {
135 state.advance(ch.len_utf8())
136 }
137 }
138 state.add_token(PrologTokenType::Comment, start_pos, state.get_position());
139 true
140 }
141 else {
142 state.set_position(start_pos);
144 false
145 }
146 }
147 else {
148 false
149 }
150 }
151
152 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
153 let start_pos = state.get_position();
154
155 if let Some(quote_char) = state.peek() {
156 if quote_char == '"' || quote_char == '\'' {
157 state.advance(1); let mut escaped = false;
160 while let Some(ch) = state.peek() {
161 if escaped {
162 escaped = false;
163 state.advance(ch.len_utf8())
164 }
165 else if ch == '\\' {
166 escaped = true;
167 state.advance(1)
168 }
169 else if ch == quote_char {
170 state.advance(1); break;
172 }
173 else if ch == '\n' || ch == '\r' {
174 break;
176 }
177 else {
178 state.advance(ch.len_utf8())
179 }
180 }
181
182 state.add_token(PrologTokenType::String, start_pos, state.get_position());
183 true
184 }
185 else {
186 false
187 }
188 }
189 else {
190 false
191 }
192 }
193
194 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
195 if let Some(ch) = state.peek() {
196 if ch.is_ascii_digit() {
197 let start_pos = state.get_position();
198
199 while let Some(ch) = state.peek() {
201 if ch.is_ascii_digit() { state.advance(1) } else { break }
202 }
203
204 if let Some('.') = state.peek() {
206 state.advance(1);
207 while let Some(ch) = state.peek() {
209 if ch.is_ascii_digit() { state.advance(1) } else { break }
210 }
211 }
212
213 if let Some(ch) = state.peek() {
215 if ch == 'e' || ch == 'E' {
216 state.advance(1);
217 if let Some(ch) = state.peek() {
218 if ch == '+' || ch == '-' {
219 state.advance(1)
220 }
221 }
222 while let Some(ch) = state.peek() {
223 if ch.is_ascii_digit() { state.advance(1) } else { break }
224 }
225 }
226 }
227
228 state.add_token(PrologTokenType::Integer, start_pos, state.get_position());
229 true
230 }
231 else {
232 false
233 }
234 }
235 else {
236 false
237 }
238 }
239
240 fn lex_atom_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
241 if let Some(ch) = state.peek() {
242 if ch.is_ascii_lowercase() || ch == '_' {
243 let start_pos = state.get_position();
244 let mut text = String::new();
245
246 while let Some(ch) = state.peek() {
248 if ch.is_alphanumeric() || ch == '_' {
249 text.push(ch);
250 state.advance(ch.len_utf8())
251 }
252 else {
253 break;
254 }
255 }
256
257 let kind = match text.as_str() {
259 "is" => PrologTokenType::Is,
260 "mod" => PrologTokenType::Modulo,
261 _ => PrologTokenType::Atom,
262 };
263
264 state.add_token(kind, start_pos, state.get_position());
265 true
266 }
267 else {
268 false
269 }
270 }
271 else {
272 false
273 }
274 }
275
276 fn lex_variable<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
277 if let Some(ch) = state.peek() {
278 if ch.is_ascii_uppercase() || ch == '_' {
279 let start_pos = state.get_position();
280
281 while let Some(ch) = state.peek() {
283 if ch.is_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
284 }
285
286 state.add_token(PrologTokenType::Variable, start_pos, state.get_position());
287 true
288 }
289 else {
290 false
291 }
292 }
293 else {
294 false
295 }
296 }
297
298 fn lex_operators_and_punctuation<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
299 if let Some(ch) = state.peek() {
300 let start_pos = state.get_position();
301
302 let kind = match ch {
303 '+' => {
304 state.advance(1);
305 PrologTokenType::Plus
306 }
307 '-' => {
308 state.advance(1);
309 PrologTokenType::Minus
310 }
311 '*' => {
312 state.advance(1);
313 if let Some('*') = state.peek() {
314 state.advance(1);
315 PrologTokenType::Power
316 }
317 else {
318 PrologTokenType::Multiply
319 }
320 }
321 '/' => {
322 state.advance(1);
323 if let Some('/') = state.peek() {
324 state.advance(1);
325 PrologTokenType::IntDivide
326 }
327 else {
328 PrologTokenType::Divide
329 }
330 }
331 '=' => {
332 state.advance(1);
333 if let Some('=') = state.peek() {
334 state.advance(1);
335 PrologTokenType::Equal
336 }
337 else if let Some(':') = state.peek() {
338 state.advance(1);
339 if let Some('=') = state.peek() {
340 state.advance(1);
341 PrologTokenType::ArithEqual
342 }
343 else {
344 state.set_position(start_pos + 1);
346 PrologTokenType::Unify
347 }
348 }
349 else if let Some('\\') = state.peek() {
350 state.advance(1);
351 if let Some('=') = state.peek() {
352 state.advance(1);
353 PrologTokenType::NotUnify
354 }
355 else {
356 state.set_position(start_pos + 1);
358 PrologTokenType::Unify
359 }
360 }
361 else if let Some('<') = state.peek() {
362 state.advance(1);
363 PrologTokenType::ArithNotEqual
364 }
365 else {
366 PrologTokenType::Unify
367 }
368 }
369 '<' => {
370 state.advance(1);
371 if let Some('=') = state.peek() {
372 state.advance(1);
373 PrologTokenType::LessEqual
374 }
375 else {
376 PrologTokenType::Less
377 }
378 }
379 '>' => {
380 state.advance(1);
381 if let Some('=') = state.peek() {
382 state.advance(1);
383 PrologTokenType::GreaterEqual
384 }
385 else {
386 PrologTokenType::Greater
387 }
388 }
389 '\\' => {
390 state.advance(1);
391 if let Some('=') = state.peek() {
392 state.advance(1);
393 if let Some('=') = state.peek() {
394 state.advance(1);
395 PrologTokenType::NotEqual
396 }
397 else {
398 PrologTokenType::NotUnify
399 }
400 }
401 else {
402 PrologTokenType::BitwiseNot
403 }
404 }
405 '!' => {
406 state.advance(1);
407 PrologTokenType::Cut
408 }
409 '?' => {
410 state.advance(1);
411 PrologTokenType::Question
412 }
413 ':' => {
414 state.advance(1);
415 if let Some('-') = state.peek() {
416 state.advance(1);
417 PrologTokenType::ColonMinus
418 }
419 else {
420 PrologTokenType::Colon
421 }
422 }
423 ';' => {
424 state.advance(1);
425 PrologTokenType::Semicolon
426 }
427 ',' => {
428 state.advance(1);
429 PrologTokenType::Comma
430 }
431 '.' => {
432 state.advance(1);
433 PrologTokenType::Dot
434 }
435 '(' => {
436 state.advance(1);
437 PrologTokenType::LeftParen
438 }
439 ')' => {
440 state.advance(1);
441 PrologTokenType::RightParen
442 }
443 '[' => {
444 state.advance(1);
445 PrologTokenType::LeftBracket
446 }
447 ']' => {
448 state.advance(1);
449 PrologTokenType::RightBracket
450 }
451 '{' => {
452 state.advance(1);
453 PrologTokenType::LeftBrace
454 }
455 '}' => {
456 state.advance(1);
457 PrologTokenType::RightBrace
458 }
459 '|' => {
460 state.advance(1);
461 PrologTokenType::Pipe
462 }
463 '^' => {
464 state.advance(1);
465 PrologTokenType::BitwiseXor
466 }
467 _ => return false,
468 };
469
470 state.add_token(kind, start_pos, state.get_position());
471 true
472 }
473 else {
474 false
475 }
476 }
477}
478
479impl<'config> Lexer<PrologLanguage> for PrologLexer<'config> {
480 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PrologLanguage>) -> LexOutput<PrologLanguage> {
481 let mut state = State::new_with_cache(source, 0, cache);
482 let result = self.run(&mut state);
483 if result.is_ok() {
484 state.add_eof()
485 }
486 state.finish_with_cache(result, cache)
487 }
488}