1use crate::{kind::LeanSyntaxKind, language::LeanLanguage};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
7
8type State<'a, S> = LexerState<'a, S, LeanLanguage>;
9
10#[derive(Debug, Clone, Copy)]
12pub struct LeanLexer;
13
14impl Lexer<LeanLanguage> for LeanLexer {
15 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LeanLanguage>) -> LexOutput<LeanLanguage> {
16 let mut state = State::new(text);
17 let result = self.run(&mut state);
18 if result.is_ok() {
19 state.add_eof();
20 }
21 state.finish_with_cache(result, cache)
22 }
23}
24
25impl LeanLexer {
26 pub fn new(_config: &LeanLanguage) -> Self {
28 Self
29 }
30
31 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
33 let start_pos = state.get_position();
34
35 while let Some(ch) = state.peek() {
36 if ch == ' ' || ch == '\t' {
37 state.advance(ch.len_utf8());
38 }
39 else {
40 break;
41 }
42 }
43
44 if state.get_position() > start_pos {
45 state.add_token(LeanSyntaxKind::Whitespace, start_pos, state.get_position());
46 true
47 }
48 else {
49 false
50 }
51 }
52
53 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
55 let start_pos = state.get_position();
56
57 if let Some('\n') = state.peek() {
58 state.advance(1);
59 state.add_token(LeanSyntaxKind::Newline, start_pos, state.get_position());
60 true
61 }
62 else if let Some('\r') = state.peek() {
63 state.advance(1);
64 if let Some('\n') = state.peek() {
65 state.advance(1);
66 }
67 state.add_token(LeanSyntaxKind::Newline, start_pos, state.get_position());
68 true
69 }
70 else {
71 false
72 }
73 }
74
75 fn lex_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
77 let start_pos = state.get_position();
78
79 if let Some('-') = state.peek() {
80 state.advance(1);
81 if let Some('-') = state.peek() {
82 state.advance(1);
84 while let Some(ch) = state.peek() {
85 if ch == '\n' || ch == '\r' {
86 break;
87 }
88 state.advance(ch.len_utf8());
89 }
90 state.add_token(LeanSyntaxKind::Comment, start_pos, state.get_position());
91 true
92 }
93 else {
94 state.set_position(start_pos);
96 false
97 }
98 }
99 else if let Some('/') = state.peek() {
100 state.advance(1);
101 if let Some('-') = state.peek() {
102 state.advance(1);
104 let mut depth = 1;
105 while depth > 0 && state.not_at_end() {
106 if let Some('/') = state.peek() {
107 state.advance(1);
108 if let Some('-') = state.peek() {
109 state.advance(1);
110 depth += 1;
111 }
112 }
113 else if let Some('-') = state.peek() {
114 state.advance(1);
115 if let Some('/') = state.peek() {
116 state.advance(1);
117 depth -= 1;
118 }
119 }
120 else if let Some(ch) = state.peek() {
121 state.advance(ch.len_utf8());
122 }
123 else {
124 break;
125 }
126 }
127 state.add_token(LeanSyntaxKind::Comment, start_pos, state.get_position());
128 true
129 }
130 else {
131 state.set_position(start_pos);
133 false
134 }
135 }
136 else {
137 false
138 }
139 }
140
141 fn lex_string<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
143 let start_pos = state.get_position();
144
145 if let Some('"') = state.peek() {
146 state.advance(1);
147
148 while let Some(ch) = state.peek() {
149 if ch == '"' {
150 state.advance(1);
151 break;
152 }
153 else if ch == '\\' {
154 state.advance(1);
155 if let Some(escaped) = state.peek() {
156 state.advance(escaped.len_utf8());
157 }
158 }
159 else if ch == '\n' || ch == '\r' {
160 break; }
162 else {
163 state.advance(ch.len_utf8());
164 }
165 }
166 state.add_token(LeanSyntaxKind::StringLiteral, start_pos, state.get_position());
167 true
168 }
169 else {
170 false
171 }
172 }
173
174 fn lex_char<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
176 let start_pos = state.get_position();
177
178 if let Some('\'') = state.peek() {
179 state.advance(1);
180
181 while let Some(ch) = state.peek() {
182 if ch == '\'' {
183 state.advance(1);
184 break;
185 }
186 else if ch == '\\' {
187 state.advance(1);
188 if let Some(escaped) = state.peek() {
189 state.advance(escaped.len_utf8());
190 }
191 }
192 else if ch == '\n' || ch == '\r' {
193 break;
194 }
195 else {
196 state.advance(ch.len_utf8());
197 }
198 }
199 state.add_token(LeanSyntaxKind::CharLiteral, start_pos, state.get_position());
200 true
201 }
202 else {
203 false
204 }
205 }
206
207 fn lex_number<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
209 let start_pos = state.get_position();
210
211 if let Some(ch) = state.peek() {
212 if ch.is_ascii_digit() {
213 while let Some(ch) = state.peek() {
215 if ch.is_ascii_digit() {
216 state.advance(ch.len_utf8());
217 }
218 else {
219 break;
220 }
221 }
222
223 if let Some('.') = state.peek() {
225 state.advance(1); if let Some(next_char) = state.peek() {
227 if next_char.is_ascii_digit() {
228 while let Some(ch) = state.peek() {
229 if ch.is_ascii_digit() {
230 state.advance(ch.len_utf8());
231 }
232 else {
233 break;
234 }
235 }
236 }
237 }
238 }
239
240 if let Some(ch) = state.peek() {
242 if ch == 'e' || ch == 'E' {
243 state.advance(1);
244 if let Some(sign) = state.peek() {
245 if sign == '+' || sign == '-' {
246 state.advance(1);
247 }
248 }
249 while let Some(ch) = state.peek() {
250 if ch.is_ascii_digit() {
251 state.advance(ch.len_utf8());
252 }
253 else {
254 break;
255 }
256 }
257 }
258 }
259
260 state.add_token(LeanSyntaxKind::IntegerLiteral, start_pos, state.get_position());
261 true
262 }
263 else {
264 false
265 }
266 }
267 else {
268 false
269 }
270 }
271
272 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
274 let start_pos = state.get_position();
275
276 if let Some(ch) = state.peek() {
277 if ch.is_alphabetic() || ch == '_' {
278 state.advance(ch.len_utf8());
279
280 while let Some(ch) = state.peek() {
281 if ch.is_alphanumeric() || ch == '_' || ch == '\'' {
282 state.advance(ch.len_utf8());
283 }
284 else {
285 break;
286 }
287 }
288
289 let text = state.get_text_in(oak_core::Range { start: start_pos, end: state.get_position() });
290 let kind = match text.as_ref() {
291 "axiom" => LeanSyntaxKind::Axiom,
292 "constant" => LeanSyntaxKind::Constant,
293 "def" => LeanSyntaxKind::Def,
294 "example" => LeanSyntaxKind::Example,
295 "inductive" => LeanSyntaxKind::Inductive,
296 "lemma" => LeanSyntaxKind::Lemma,
297 "namespace" => LeanSyntaxKind::Namespace,
298 "open" => LeanSyntaxKind::Open,
299 "private" => LeanSyntaxKind::Private,
300 "protected" => LeanSyntaxKind::Protected,
301 "section" => LeanSyntaxKind::Section,
302 "structure" => LeanSyntaxKind::Structure,
303 "theorem" => LeanSyntaxKind::Theorem,
304 "universe" => LeanSyntaxKind::Universe,
305 "variable" => LeanSyntaxKind::Variable,
306 "variables" => LeanSyntaxKind::Variables,
307 "end" => LeanSyntaxKind::End,
308 "import" => LeanSyntaxKind::Import,
309 "export" => LeanSyntaxKind::Export,
310 "prelude" => LeanSyntaxKind::Prelude,
311 "noncomputable" => LeanSyntaxKind::Noncomputable,
312 "partial" => LeanSyntaxKind::Partial,
313 "unsafe" => LeanSyntaxKind::Unsafe,
314 "mutual" => LeanSyntaxKind::Mutual,
315 "where" => LeanSyntaxKind::Where,
316 "have" => LeanSyntaxKind::Have,
317 "show" => LeanSyntaxKind::Show,
318 "suffices" => LeanSyntaxKind::Suffices,
319 "let" => LeanSyntaxKind::Let,
320 "in" => LeanSyntaxKind::In,
321 "if" => LeanSyntaxKind::If,
322 "then" => LeanSyntaxKind::Then,
323 "else" => LeanSyntaxKind::Else,
324 "match" => LeanSyntaxKind::Match,
325 "with" => LeanSyntaxKind::With,
326 "fun" => LeanSyntaxKind::Fun,
327 "do" => LeanSyntaxKind::Do,
328 "for" => LeanSyntaxKind::For,
329 "while" => LeanSyntaxKind::While,
330 "break" => LeanSyntaxKind::Break,
331 "continue" => LeanSyntaxKind::Continue,
332 "return" => LeanSyntaxKind::Return,
333 "try" => LeanSyntaxKind::Try,
334 "catch" => LeanSyntaxKind::Catch,
335 "finally" => LeanSyntaxKind::Finally,
336 "throw" => LeanSyntaxKind::Throw,
337 _ => LeanSyntaxKind::Identifier,
338 };
339
340 state.add_token(kind, start_pos, state.get_position());
341 true
342 }
343 else {
344 false
345 }
346 }
347 else {
348 false
349 }
350 }
351
352 fn lex_operator_or_delimiter<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
354 let start_pos = state.get_position();
355
356 if let Some(ch) = state.peek() {
357 let kind = match ch {
358 '(' => LeanSyntaxKind::LeftParen,
359 ')' => LeanSyntaxKind::RightParen,
360 '{' => LeanSyntaxKind::LeftBrace,
361 '}' => LeanSyntaxKind::RightBrace,
362 '[' => LeanSyntaxKind::LeftBracket,
363 ']' => LeanSyntaxKind::RightBracket,
364 ',' => LeanSyntaxKind::Comma,
365 ';' => LeanSyntaxKind::Semicolon,
366 '+' => LeanSyntaxKind::Plus,
367 '*' => LeanSyntaxKind::Star,
368 '/' => LeanSyntaxKind::Slash,
369 '%' => LeanSyntaxKind::Percent,
370 '^' => LeanSyntaxKind::Caret,
371 '#' => LeanSyntaxKind::Hash,
372 '&' => LeanSyntaxKind::Ampersand,
373 '|' => LeanSyntaxKind::Pipe,
374 '~' => LeanSyntaxKind::Tilde,
375 '!' => LeanSyntaxKind::Bang,
376 '?' => LeanSyntaxKind::Question,
377 '@' => LeanSyntaxKind::At,
378 '$' => LeanSyntaxKind::Dollar,
379 '<' => LeanSyntaxKind::Lt,
380 '>' => LeanSyntaxKind::Gt,
381 '=' => LeanSyntaxKind::Eq,
382 _ => {
383 match ch {
385 ':' => {
386 state.advance(1);
387 if let Some('=') = state.peek() {
388 state.advance(1);
389 state.add_token(LeanSyntaxKind::ColonEq, start_pos, state.get_position());
390 return true;
391 }
392 else if let Some(':') = state.peek() {
393 state.advance(1);
394 state.add_token(LeanSyntaxKind::ColonColon, start_pos, state.get_position());
395 return true;
396 }
397 else {
398 state.add_token(LeanSyntaxKind::Colon, start_pos, state.get_position());
399 return true;
400 }
401 }
402 '.' => {
403 state.advance(1);
404 if let Some('.') = state.peek() {
405 state.advance(1);
406 state.add_token(LeanSyntaxKind::DotDot, start_pos, state.get_position());
407 return true;
408 }
409 else {
410 state.add_token(LeanSyntaxKind::Dot, start_pos, state.get_position());
411 return true;
412 }
413 }
414 '-' => {
415 state.advance(1);
416 if let Some('>') = state.peek() {
417 state.advance(1);
418 state.add_token(LeanSyntaxKind::Arrow, start_pos, state.get_position());
419 return true;
420 }
421 else {
422 state.add_token(LeanSyntaxKind::Minus, start_pos, state.get_position());
423 return true;
424 }
425 }
426 _ => return false,
427 }
428 }
429 };
430
431 state.advance(ch.len_utf8());
432 state.add_token(kind, start_pos, state.get_position());
433 true
434 }
435 else {
436 false
437 }
438 }
439
440 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
441 while state.not_at_end() {
442 let safe_point = state.get_position();
443 if self.skip_whitespace(state) {
444 continue;
445 }
446
447 if self.lex_newline(state) {
448 continue;
449 }
450
451 if self.lex_comment(state) {
452 continue;
453 }
454
455 if self.lex_string(state) {
456 continue;
457 }
458
459 if self.lex_char(state) {
460 continue;
461 }
462
463 if self.lex_number(state) {
464 continue;
465 }
466
467 if self.lex_identifier_or_keyword(state) {
468 continue;
469 }
470
471 if self.lex_operator_or_delimiter(state) {
472 continue;
473 }
474
475 let start_pos = state.get_position();
477 if let Some(ch) = state.peek() {
478 state.advance(ch.len_utf8());
479 state.add_token(LeanSyntaxKind::Error, start_pos, state.get_position());
480 }
481
482 state.advance_if_dead_lock(safe_point);
483 }
484
485 Ok(())
486 }
487}