1use crate::{kind::LeanSyntaxKind, language::LeanLanguage};
6use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
7
8type State<S> = LexerState<S, LeanLanguage>;
9
10pub struct LeanLexer<'config> {
12 config: &'config LeanLanguage,
13}
14
15impl<'config> LeanLexer<'config> {
16 pub fn new(config: &'config LeanLanguage) -> Self {
18 Self { config }
19 }
20
21 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
23 let start_pos = state.get_position();
24
25 while let Some(ch) = state.peek() {
26 if ch == ' ' || ch == '\t' {
27 state.advance(ch.len_utf8());
28 }
29 else {
30 break;
31 }
32 }
33
34 if state.get_position() > start_pos {
35 state.add_token(LeanSyntaxKind::Whitespace, start_pos, state.get_position());
36 true
37 }
38 else {
39 false
40 }
41 }
42
43 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
45 let start_pos = state.get_position();
46
47 if let Some('\n') = state.peek() {
48 state.advance(1);
49 state.add_token(LeanSyntaxKind::Whitespace, start_pos, state.get_position());
50 true
51 }
52 else if let Some('\r') = state.peek() {
53 state.advance(1);
54 if let Some('\n') = state.peek() {
55 state.advance(1);
56 }
57 state.add_token(LeanSyntaxKind::Whitespace, start_pos, state.get_position());
58 true
59 }
60 else {
61 false
62 }
63 }
64
65 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
67 let start_pos = state.get_position();
68
69 if let Some('-') = state.peek() {
70 state.advance(1);
71 if let Some('-') = state.peek() {
72 state.advance(1);
74 while let Some(ch) = state.peek() {
75 if ch == '\n' || ch == '\r' {
76 break;
77 }
78 state.advance(ch.len_utf8());
79 }
80 state.add_token(LeanSyntaxKind::Comment, start_pos, state.get_position());
81 true
82 }
83 else {
84 state.set_position(start_pos);
86 false
87 }
88 }
89 else if let Some('/') = state.peek() {
90 state.advance(1);
91 if let Some('-') = state.peek() {
92 state.advance(1);
94 let mut depth = 1;
95 while depth > 0 && state.not_at_end() {
96 if let Some('/') = state.peek() {
97 state.advance(1);
98 if let Some('-') = state.peek() {
99 state.advance(1);
100 depth += 1;
101 }
102 }
103 else if let Some('-') = state.peek() {
104 state.advance(1);
105 if let Some('/') = state.peek() {
106 state.advance(1);
107 depth -= 1;
108 }
109 }
110 else {
111 let ch = state.peek().unwrap();
112 state.advance(ch.len_utf8());
113 }
114 }
115 state.add_token(LeanSyntaxKind::Comment, start_pos, state.get_position());
116 true
117 }
118 else {
119 state.set_position(start_pos);
121 false
122 }
123 }
124 else {
125 false
126 }
127 }
128
129 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
131 let start_pos = state.get_position();
132
133 if let Some('"') = state.peek() {
134 state.advance(1);
135
136 while let Some(ch) = state.peek() {
137 if ch == '"' {
138 state.advance(1);
139 break;
140 }
141 else if ch == '\\' {
142 state.advance(1);
143 if let Some(escaped) = state.peek() {
144 state.advance(escaped.len_utf8());
145 }
146 }
147 else if ch == '\n' || ch == '\r' {
148 break; }
150 else {
151 state.advance(ch.len_utf8());
152 }
153 }
154 state.add_token(LeanSyntaxKind::StringLiteral, start_pos, state.get_position());
155 true
156 }
157 else {
158 false
159 }
160 }
161
162 fn lex_char<S: Source>(&self, state: &mut State<S>) -> bool {
164 let start_pos = state.get_position();
165
166 if let Some('\'') = state.peek() {
167 state.advance(1);
168
169 while let Some(ch) = state.peek() {
170 if ch == '\'' {
171 state.advance(1);
172 break;
173 }
174 else if ch == '\\' {
175 state.advance(1);
176 if let Some(escaped) = state.peek() {
177 state.advance(escaped.len_utf8());
178 }
179 }
180 else if ch == '\n' || ch == '\r' {
181 break;
182 }
183 else {
184 state.advance(ch.len_utf8());
185 }
186 }
187 state.add_token(LeanSyntaxKind::CharLiteral, start_pos, state.get_position());
188 true
189 }
190 else {
191 false
192 }
193 }
194
195 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
197 let start_pos = state.get_position();
198
199 if let Some(ch) = state.peek() {
200 if ch.is_ascii_digit() {
201 while let Some(ch) = state.peek() {
203 if ch.is_ascii_digit() {
204 state.advance(ch.len_utf8());
205 }
206 else {
207 break;
208 }
209 }
210
211 if let Some('.') = state.peek() {
213 state.advance(1); if let Some(next_char) = state.peek() {
215 if next_char.is_ascii_digit() {
216 while let Some(ch) = state.peek() {
217 if ch.is_ascii_digit() {
218 state.advance(ch.len_utf8());
219 }
220 else {
221 break;
222 }
223 }
224 }
225 }
226 }
227
228 if let Some(ch) = state.peek() {
230 if ch == 'e' || ch == 'E' {
231 state.advance(1);
232 if let Some(sign) = state.peek() {
233 if sign == '+' || sign == '-' {
234 state.advance(1);
235 }
236 }
237 while let Some(ch) = state.peek() {
238 if ch.is_ascii_digit() {
239 state.advance(ch.len_utf8());
240 }
241 else {
242 break;
243 }
244 }
245 }
246 }
247
248 state.add_token(LeanSyntaxKind::IntegerLiteral, start_pos, state.get_position());
249 true
250 }
251 else {
252 false
253 }
254 }
255 else {
256 false
257 }
258 }
259
260 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
262 let start_pos = state.get_position();
263
264 if let Some(ch) = state.peek() {
265 if ch.is_alphabetic() || ch == '_' {
266 state.advance(ch.len_utf8());
267
268 while let Some(ch) = state.peek() {
269 if ch.is_alphanumeric() || ch == '_' || ch == '\'' {
270 state.advance(ch.len_utf8());
271 }
272 else {
273 break;
274 }
275 }
276
277 let text = state.get_text_in((start_pos..state.get_position()).into());
278 let kind = match text {
279 "axiom" => LeanSyntaxKind::Axiom,
280 "constant" => LeanSyntaxKind::Constant,
281 "def" => LeanSyntaxKind::Def,
282 "example" => LeanSyntaxKind::Example,
283 "inductive" => LeanSyntaxKind::Inductive,
284 "lemma" => LeanSyntaxKind::Lemma,
285 "namespace" => LeanSyntaxKind::Namespace,
286 "open" => LeanSyntaxKind::Open,
287 "private" => LeanSyntaxKind::Private,
288 "protected" => LeanSyntaxKind::Protected,
289 "section" => LeanSyntaxKind::Section,
290 "structure" => LeanSyntaxKind::Structure,
291 "theorem" => LeanSyntaxKind::Theorem,
292 "universe" => LeanSyntaxKind::Universe,
293 "variable" => LeanSyntaxKind::Variable,
294 "variables" => LeanSyntaxKind::Variables,
295 "end" => LeanSyntaxKind::End,
296 "import" => LeanSyntaxKind::Import,
297 "export" => LeanSyntaxKind::Export,
298 "prelude" => LeanSyntaxKind::Prelude,
299 "noncomputable" => LeanSyntaxKind::Noncomputable,
300 "partial" => LeanSyntaxKind::Partial,
301 "unsafe" => LeanSyntaxKind::Unsafe,
302 "mutual" => LeanSyntaxKind::Mutual,
303 "where" => LeanSyntaxKind::Where,
304 "have" => LeanSyntaxKind::Have,
305 "show" => LeanSyntaxKind::Show,
306 "suffices" => LeanSyntaxKind::Suffices,
307 "let" => LeanSyntaxKind::Let,
308 "in" => LeanSyntaxKind::In,
309 "if" => LeanSyntaxKind::If,
310 "then" => LeanSyntaxKind::Then,
311 "else" => LeanSyntaxKind::Else,
312 "match" => LeanSyntaxKind::Match,
313 "with" => LeanSyntaxKind::With,
314 "fun" => LeanSyntaxKind::Fun,
315 "do" => LeanSyntaxKind::Do,
316 "for" => LeanSyntaxKind::For,
317 "while" => LeanSyntaxKind::While,
318 "break" => LeanSyntaxKind::Break,
319 "continue" => LeanSyntaxKind::Continue,
320 "return" => LeanSyntaxKind::Return,
321 "try" => LeanSyntaxKind::Try,
322 "catch" => LeanSyntaxKind::Catch,
323 "finally" => LeanSyntaxKind::Finally,
324 "throw" => LeanSyntaxKind::Throw,
325 _ => LeanSyntaxKind::Identifier,
326 };
327
328 state.add_token(kind, start_pos, state.get_position());
329 true
330 }
331 else {
332 false
333 }
334 }
335 else {
336 false
337 }
338 }
339
340 fn lex_operator_or_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
342 let start_pos = state.get_position();
343
344 if let Some(ch) = state.peek() {
345 let kind = match ch {
346 '(' => LeanSyntaxKind::LeftParen,
347 ')' => LeanSyntaxKind::RightParen,
348 '{' => LeanSyntaxKind::LeftBrace,
349 '}' => LeanSyntaxKind::RightBrace,
350 '[' => LeanSyntaxKind::LeftBracket,
351 ']' => LeanSyntaxKind::RightBracket,
352 ',' => LeanSyntaxKind::Comma,
353 ';' => LeanSyntaxKind::Semicolon,
354 '+' => LeanSyntaxKind::Plus,
355 '*' => LeanSyntaxKind::Star,
356 '/' => LeanSyntaxKind::Slash,
357 '%' => LeanSyntaxKind::Percent,
358 '^' => LeanSyntaxKind::Caret,
359 '#' => LeanSyntaxKind::Hash,
360 '&' => LeanSyntaxKind::Ampersand,
361 '|' => LeanSyntaxKind::Pipe,
362 '~' => LeanSyntaxKind::Tilde,
363 '!' => LeanSyntaxKind::Bang,
364 '?' => LeanSyntaxKind::Question,
365 '@' => LeanSyntaxKind::At,
366 '$' => LeanSyntaxKind::Dollar,
367 '<' => LeanSyntaxKind::Lt,
368 '>' => LeanSyntaxKind::Gt,
369 '=' => LeanSyntaxKind::Eq,
370 _ => {
371 match ch {
373 ':' => {
374 state.advance(1);
375 if let Some('=') = state.peek() {
376 state.advance(1);
377 state.add_token(LeanSyntaxKind::ColonEq, start_pos, state.get_position());
378 return true;
379 }
380 else if let Some(':') = state.peek() {
381 state.advance(1);
382 state.add_token(LeanSyntaxKind::ColonColon, start_pos, state.get_position());
383 return true;
384 }
385 else {
386 state.add_token(LeanSyntaxKind::Colon, start_pos, state.get_position());
387 return true;
388 }
389 }
390 '.' => {
391 state.advance(1);
392 if let Some('.') = state.peek() {
393 state.advance(1);
394 state.add_token(LeanSyntaxKind::DotDot, start_pos, state.get_position());
395 return true;
396 }
397 else {
398 state.add_token(LeanSyntaxKind::Dot, start_pos, state.get_position());
399 return true;
400 }
401 }
402 '-' => {
403 state.advance(1);
404 if let Some('>') = state.peek() {
405 state.advance(1);
406 state.add_token(LeanSyntaxKind::Arrow, start_pos, state.get_position());
407 return true;
408 }
409 else {
410 state.add_token(LeanSyntaxKind::Minus, start_pos, state.get_position());
411 return true;
412 }
413 }
414 _ => return false,
415 }
416 }
417 };
418
419 state.advance(ch.len_utf8());
420 state.add_token(kind, start_pos, state.get_position());
421 true
422 }
423 else {
424 false
425 }
426 }
427}
428
429impl<'config> Lexer<LeanLanguage> for LeanLexer<'config> {
430 fn lex_incremental(
431 &self,
432 source: impl Source,
433 changed: usize,
434 cache: IncrementalCache<LeanLanguage>,
435 ) -> LexOutput<LeanLanguage> {
436 let mut state = LexerState::new_with_cache(source, changed, cache);
437 let result = self.run(&mut state);
438 state.finish(result)
439 }
440
441 fn lex(&self, source: impl Source) -> LexOutput<LeanLanguage> {
442 let mut state = State::new(source);
443 let result = self.run(&mut state);
444 state.finish(result)
445 }
446}
447
448impl<'config> LeanLexer<'config> {
449 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
450 while state.not_at_end() {
451 if self.skip_whitespace(state) {
452 continue;
453 }
454
455 if self.lex_newline(state) {
456 continue;
457 }
458
459 if self.lex_comment(state) {
460 continue;
461 }
462
463 if self.lex_string(state) {
464 continue;
465 }
466
467 if self.lex_char(state) {
468 continue;
469 }
470
471 if self.lex_number(state) {
472 continue;
473 }
474
475 if self.lex_identifier_or_keyword(state) {
476 continue;
477 }
478
479 if self.lex_operator_or_delimiter(state) {
480 continue;
481 }
482
483 let start_pos = state.get_position();
485 if let Some(ch) = state.peek() {
486 state.advance(ch.len_utf8());
487 state.add_token(LeanSyntaxKind::Error, start_pos, state.get_position());
488 }
489 }
490
491 let eof_pos = state.get_position();
493 state.add_token(LeanSyntaxKind::Eof, eof_pos, eof_pos);
494
495 Ok(())
496 }
497}