1use crate::{kind::LeanSyntaxKind, language::LeanLanguage};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
7
8type State<'a, S> = LexerState<'a, S, LeanLanguage>;
9
10#[derive(Debug, Clone)]
12pub struct LeanLexer<'config> {
13 _config: &'config LeanLanguage,
14}
15
16impl<'config> Lexer<LeanLanguage> for LeanLexer<'config> {
17 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LeanLanguage>) -> LexOutput<LeanLanguage> {
18 let mut state = State::new(text);
19 let result = self.run(&mut state);
20 if result.is_ok() {
21 state.add_eof();
22 }
23 state.finish_with_cache(result, cache)
24 }
25}
26
27impl<'config> LeanLexer<'config> {
28 pub fn new(config: &'config LeanLanguage) -> Self {
30 Self { _config: config }
31 }
32
33 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
35 let start_pos = state.get_position();
36
37 while let Some(ch) = state.peek() {
38 if ch == ' ' || ch == '\t' {
39 state.advance(ch.len_utf8());
40 }
41 else {
42 break;
43 }
44 }
45
46 if state.get_position() > start_pos {
47 state.add_token(LeanSyntaxKind::Whitespace, start_pos, state.get_position());
48 true
49 }
50 else {
51 false
52 }
53 }
54
55 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
57 let start_pos = state.get_position();
58
59 if let Some('\n') = state.peek() {
60 state.advance(1);
61 state.add_token(LeanSyntaxKind::Newline, start_pos, state.get_position());
62 true
63 }
64 else if let Some('\r') = state.peek() {
65 state.advance(1);
66 if let Some('\n') = state.peek() {
67 state.advance(1);
68 }
69 state.add_token(LeanSyntaxKind::Newline, start_pos, state.get_position());
70 true
71 }
72 else {
73 false
74 }
75 }
76
77 fn lex_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
79 let start_pos = state.get_position();
80
81 if let Some('-') = state.peek() {
82 state.advance(1);
83 if let Some('-') = state.peek() {
84 state.advance(1);
86 while let Some(ch) = state.peek() {
87 if ch == '\n' || ch == '\r' {
88 break;
89 }
90 state.advance(ch.len_utf8());
91 }
92 state.add_token(LeanSyntaxKind::Comment, start_pos, state.get_position());
93 true
94 }
95 else {
96 state.set_position(start_pos);
98 false
99 }
100 }
101 else if let Some('/') = state.peek() {
102 state.advance(1);
103 if let Some('-') = state.peek() {
104 state.advance(1);
106 let mut depth = 1;
107 while depth > 0 && state.not_at_end() {
108 if let Some('/') = state.peek() {
109 state.advance(1);
110 if let Some('-') = state.peek() {
111 state.advance(1);
112 depth += 1;
113 }
114 }
115 else if let Some('-') = state.peek() {
116 state.advance(1);
117 if let Some('/') = state.peek() {
118 state.advance(1);
119 depth -= 1;
120 }
121 }
122 else if let Some(ch) = state.peek() {
123 state.advance(ch.len_utf8());
124 }
125 else {
126 break;
127 }
128 }
129 state.add_token(LeanSyntaxKind::Comment, start_pos, state.get_position());
130 true
131 }
132 else {
133 state.set_position(start_pos);
135 false
136 }
137 }
138 else {
139 false
140 }
141 }
142
143 fn lex_string<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
145 let start_pos = state.get_position();
146
147 if let Some('"') = state.peek() {
148 state.advance(1);
149
150 while let Some(ch) = state.peek() {
151 if ch == '"' {
152 state.advance(1);
153 break;
154 }
155 else if ch == '\\' {
156 state.advance(1);
157 if let Some(escaped) = state.peek() {
158 state.advance(escaped.len_utf8());
159 }
160 }
161 else if ch == '\n' || ch == '\r' {
162 break; }
164 else {
165 state.advance(ch.len_utf8());
166 }
167 }
168 state.add_token(LeanSyntaxKind::StringLiteral, start_pos, state.get_position());
169 true
170 }
171 else {
172 false
173 }
174 }
175
176 fn lex_char<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
178 let start_pos = state.get_position();
179
180 if let Some('\'') = state.peek() {
181 state.advance(1);
182
183 while let Some(ch) = state.peek() {
184 if ch == '\'' {
185 state.advance(1);
186 break;
187 }
188 else if ch == '\\' {
189 state.advance(1);
190 if let Some(escaped) = state.peek() {
191 state.advance(escaped.len_utf8());
192 }
193 }
194 else if ch == '\n' || ch == '\r' {
195 break;
196 }
197 else {
198 state.advance(ch.len_utf8());
199 }
200 }
201 state.add_token(LeanSyntaxKind::CharLiteral, start_pos, state.get_position());
202 true
203 }
204 else {
205 false
206 }
207 }
208
209 fn lex_number<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
211 let start_pos = state.get_position();
212
213 if let Some(ch) = state.peek() {
214 if ch.is_ascii_digit() {
215 while let Some(ch) = state.peek() {
217 if ch.is_ascii_digit() {
218 state.advance(ch.len_utf8());
219 }
220 else {
221 break;
222 }
223 }
224
225 if let Some('.') = state.peek() {
227 state.advance(1); if let Some(next_char) = state.peek() {
229 if next_char.is_ascii_digit() {
230 while let Some(ch) = state.peek() {
231 if ch.is_ascii_digit() {
232 state.advance(ch.len_utf8());
233 }
234 else {
235 break;
236 }
237 }
238 }
239 }
240 }
241
242 if let Some(ch) = state.peek() {
244 if ch == 'e' || ch == 'E' {
245 state.advance(1);
246 if let Some(sign) = state.peek() {
247 if sign == '+' || sign == '-' {
248 state.advance(1);
249 }
250 }
251 while let Some(ch) = state.peek() {
252 if ch.is_ascii_digit() {
253 state.advance(ch.len_utf8());
254 }
255 else {
256 break;
257 }
258 }
259 }
260 }
261
262 state.add_token(LeanSyntaxKind::IntegerLiteral, start_pos, state.get_position());
263 true
264 }
265 else {
266 false
267 }
268 }
269 else {
270 false
271 }
272 }
273
274 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
276 let start_pos = state.get_position();
277
278 if let Some(ch) = state.peek() {
279 if ch.is_alphabetic() || ch == '_' {
280 state.advance(ch.len_utf8());
281
282 while let Some(ch) = state.peek() {
283 if ch.is_alphanumeric() || ch == '_' || ch == '\'' {
284 state.advance(ch.len_utf8());
285 }
286 else {
287 break;
288 }
289 }
290
291 let text = state.get_text_in(oak_core::Range { start: start_pos, end: state.get_position() });
292 let kind = match text.as_ref() {
293 "axiom" => LeanSyntaxKind::Axiom,
294 "constant" => LeanSyntaxKind::Constant,
295 "def" => LeanSyntaxKind::Def,
296 "example" => LeanSyntaxKind::Example,
297 "inductive" => LeanSyntaxKind::Inductive,
298 "lemma" => LeanSyntaxKind::Lemma,
299 "namespace" => LeanSyntaxKind::Namespace,
300 "open" => LeanSyntaxKind::Open,
301 "private" => LeanSyntaxKind::Private,
302 "protected" => LeanSyntaxKind::Protected,
303 "section" => LeanSyntaxKind::Section,
304 "structure" => LeanSyntaxKind::Structure,
305 "theorem" => LeanSyntaxKind::Theorem,
306 "universe" => LeanSyntaxKind::Universe,
307 "variable" => LeanSyntaxKind::Variable,
308 "variables" => LeanSyntaxKind::Variables,
309 "end" => LeanSyntaxKind::End,
310 "import" => LeanSyntaxKind::Import,
311 "export" => LeanSyntaxKind::Export,
312 "prelude" => LeanSyntaxKind::Prelude,
313 "noncomputable" => LeanSyntaxKind::Noncomputable,
314 "partial" => LeanSyntaxKind::Partial,
315 "unsafe" => LeanSyntaxKind::Unsafe,
316 "mutual" => LeanSyntaxKind::Mutual,
317 "where" => LeanSyntaxKind::Where,
318 "have" => LeanSyntaxKind::Have,
319 "show" => LeanSyntaxKind::Show,
320 "suffices" => LeanSyntaxKind::Suffices,
321 "let" => LeanSyntaxKind::Let,
322 "in" => LeanSyntaxKind::In,
323 "if" => LeanSyntaxKind::If,
324 "then" => LeanSyntaxKind::Then,
325 "else" => LeanSyntaxKind::Else,
326 "match" => LeanSyntaxKind::Match,
327 "with" => LeanSyntaxKind::With,
328 "fun" => LeanSyntaxKind::Fun,
329 "do" => LeanSyntaxKind::Do,
330 "for" => LeanSyntaxKind::For,
331 "while" => LeanSyntaxKind::While,
332 "break" => LeanSyntaxKind::Break,
333 "continue" => LeanSyntaxKind::Continue,
334 "return" => LeanSyntaxKind::Return,
335 "try" => LeanSyntaxKind::Try,
336 "catch" => LeanSyntaxKind::Catch,
337 "finally" => LeanSyntaxKind::Finally,
338 "throw" => LeanSyntaxKind::Throw,
339 _ => LeanSyntaxKind::Identifier,
340 };
341
342 state.add_token(kind, start_pos, state.get_position());
343 true
344 }
345 else {
346 false
347 }
348 }
349 else {
350 false
351 }
352 }
353
354 fn lex_operator_or_delimiter<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
356 let start_pos = state.get_position();
357
358 if let Some(ch) = state.peek() {
359 let kind = match ch {
360 '(' => LeanSyntaxKind::LeftParen,
361 ')' => LeanSyntaxKind::RightParen,
362 '{' => LeanSyntaxKind::LeftBrace,
363 '}' => LeanSyntaxKind::RightBrace,
364 '[' => LeanSyntaxKind::LeftBracket,
365 ']' => LeanSyntaxKind::RightBracket,
366 ',' => LeanSyntaxKind::Comma,
367 ';' => LeanSyntaxKind::Semicolon,
368 '+' => LeanSyntaxKind::Plus,
369 '*' => LeanSyntaxKind::Star,
370 '/' => LeanSyntaxKind::Slash,
371 '%' => LeanSyntaxKind::Percent,
372 '^' => LeanSyntaxKind::Caret,
373 '#' => LeanSyntaxKind::Hash,
374 '&' => LeanSyntaxKind::Ampersand,
375 '|' => LeanSyntaxKind::Pipe,
376 '~' => LeanSyntaxKind::Tilde,
377 '!' => LeanSyntaxKind::Bang,
378 '?' => LeanSyntaxKind::Question,
379 '@' => LeanSyntaxKind::At,
380 '$' => LeanSyntaxKind::Dollar,
381 '<' => LeanSyntaxKind::Lt,
382 '>' => LeanSyntaxKind::Gt,
383 '=' => LeanSyntaxKind::Eq,
384 _ => {
385 match ch {
387 ':' => {
388 state.advance(1);
389 if let Some('=') = state.peek() {
390 state.advance(1);
391 state.add_token(LeanSyntaxKind::ColonEq, start_pos, state.get_position());
392 return true;
393 }
394 else if let Some(':') = state.peek() {
395 state.advance(1);
396 state.add_token(LeanSyntaxKind::ColonColon, start_pos, state.get_position());
397 return true;
398 }
399 else {
400 state.add_token(LeanSyntaxKind::Colon, start_pos, state.get_position());
401 return true;
402 }
403 }
404 '.' => {
405 state.advance(1);
406 if let Some('.') = state.peek() {
407 state.advance(1);
408 state.add_token(LeanSyntaxKind::DotDot, start_pos, state.get_position());
409 return true;
410 }
411 else {
412 state.add_token(LeanSyntaxKind::Dot, start_pos, state.get_position());
413 return true;
414 }
415 }
416 '-' => {
417 state.advance(1);
418 if let Some('>') = state.peek() {
419 state.advance(1);
420 state.add_token(LeanSyntaxKind::Arrow, start_pos, state.get_position());
421 return true;
422 }
423 else {
424 state.add_token(LeanSyntaxKind::Minus, start_pos, state.get_position());
425 return true;
426 }
427 }
428 _ => return false,
429 }
430 }
431 };
432
433 state.advance(ch.len_utf8());
434 state.add_token(kind, start_pos, state.get_position());
435 true
436 }
437 else {
438 false
439 }
440 }
441
442 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
443 while state.not_at_end() {
444 let safe_point = state.get_position();
445 if self.skip_whitespace(state) {
446 continue;
447 }
448
449 if self.lex_newline(state) {
450 continue;
451 }
452
453 if self.lex_comment(state) {
454 continue;
455 }
456
457 if self.lex_string(state) {
458 continue;
459 }
460
461 if self.lex_char(state) {
462 continue;
463 }
464
465 if self.lex_number(state) {
466 continue;
467 }
468
469 if self.lex_identifier_or_keyword(state) {
470 continue;
471 }
472
473 if self.lex_operator_or_delimiter(state) {
474 continue;
475 }
476
477 let start_pos = state.get_position();
479 if let Some(ch) = state.peek() {
480 state.advance(ch.len_utf8());
481 state.add_token(LeanSyntaxKind::Error, start_pos, state.get_position());
482 }
483
484 state.advance_if_dead_lock(safe_point);
485 }
486
487 Ok(())
488 }
489}