1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::LeanLanguage, lexer::token_type::LeanTokenType};
8use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
9
10type State<'a, S> = LexerState<'a, S, LeanLanguage>;
11
12#[derive(Debug, Clone)]
14pub struct LeanLexer<'config> {
15 _config: &'config LeanLanguage,
16}
17
18impl<'config> Lexer<LeanLanguage> for LeanLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LeanLanguage>) -> LexOutput<LeanLanguage> {
20 let mut state = State::new(text);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof()
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> LeanLexer<'config> {
30 pub fn new(config: &'config LeanLanguage) -> Self {
32 Self { _config: config }
33 }
34
35 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
37 let start_pos = state.get_position();
38
39 while let Some(ch) = state.peek() {
40 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
41 }
42
43 if state.get_position() > start_pos {
44 state.add_token(LeanTokenType::Whitespace, start_pos, state.get_position());
45 true
46 }
47 else {
48 false
49 }
50 }
51
52 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
54 let start_pos = state.get_position();
55
56 if let Some('\n') = state.peek() {
57 state.advance(1);
58 state.add_token(LeanTokenType::Newline, start_pos, state.get_position());
59 true
60 }
61 else if let Some('\r') = state.peek() {
62 state.advance(1);
63 if let Some('\n') = state.peek() {
64 state.advance(1)
65 }
66 state.add_token(LeanTokenType::Newline, start_pos, state.get_position());
67 true
68 }
69 else {
70 false
71 }
72 }
73
74 fn lex_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
76 let start_pos = state.get_position();
77
78 if let Some('-') = state.peek() {
79 state.advance(1);
80 if let Some('-') = state.peek() {
81 state.advance(1);
83 while let Some(ch) = state.peek() {
84 if ch == '\n' || ch == '\r' {
85 break;
86 }
87 state.advance(ch.len_utf8())
88 }
89 state.add_token(LeanTokenType::Comment, start_pos, state.get_position());
90 true
91 }
92 else {
93 state.set_position(start_pos);
95 false
96 }
97 }
98 else if let Some('/') = state.peek() {
99 state.advance(1);
100 if let Some('-') = state.peek() {
101 state.advance(1);
103 let mut depth = 1;
104 while depth > 0 && state.not_at_end() {
105 if let Some('/') = state.peek() {
106 state.advance(1);
107 if let Some('-') = state.peek() {
108 state.advance(1);
109 depth += 1
110 }
111 }
112 else if let Some('-') = state.peek() {
113 state.advance(1);
114 if let Some('/') = state.peek() {
115 state.advance(1);
116 depth -= 1
117 }
118 }
119 else if let Some(ch) = state.peek() {
120 state.advance(ch.len_utf8())
121 }
122 else {
123 break;
124 }
125 }
126 state.add_token(LeanTokenType::Comment, start_pos, state.get_position());
127 true
128 }
129 else {
130 state.set_position(start_pos);
132 false
133 }
134 }
135 else {
136 false
137 }
138 }
139
140 fn lex_string<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
142 let start_pos = state.get_position();
143
144 if let Some('"') = state.peek() {
145 state.advance(1);
146
147 while let Some(ch) = state.peek() {
148 if ch == '"' {
149 state.advance(1);
150 break;
151 }
152 else if ch == '\\' {
153 state.advance(1);
154 if let Some(escaped) = state.peek() {
155 state.advance(escaped.len_utf8())
156 }
157 }
158 else if ch == '\n' || ch == '\r' {
159 break; }
161 else {
162 state.advance(ch.len_utf8())
163 }
164 }
165 state.add_token(LeanTokenType::StringLiteral, start_pos, state.get_position());
166 true
167 }
168 else {
169 false
170 }
171 }
172
173 fn lex_char<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
175 let start_pos = state.get_position();
176
177 if let Some('\'') = state.peek() {
178 state.advance(1);
179
180 while let Some(ch) = state.peek() {
181 if ch == '\'' {
182 state.advance(1);
183 break;
184 }
185 else if ch == '\\' {
186 state.advance(1);
187 if let Some(escaped) = state.peek() {
188 state.advance(escaped.len_utf8())
189 }
190 }
191 else if ch == '\n' || ch == '\r' {
192 break;
193 }
194 else {
195 state.advance(ch.len_utf8())
196 }
197 }
198 state.add_token(LeanTokenType::CharLiteral, start_pos, state.get_position());
199 true
200 }
201 else {
202 false
203 }
204 }
205
206 fn lex_number<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
208 let start_pos = state.get_position();
209
210 if let Some(ch) = state.peek() {
211 if ch.is_ascii_digit() {
212 while let Some(ch) = state.peek() {
214 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
215 }
216
217 if let Some('.') = state.peek() {
219 state.advance(1); if let Some(next_char) = state.peek() {
221 if next_char.is_ascii_digit() {
222 while let Some(ch) = state.peek() {
223 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
224 }
225 }
226 }
227 }
228
229 if let Some(ch) = state.peek() {
231 if ch == 'e' || ch == 'E' {
232 state.advance(1);
233 if let Some(sign) = state.peek() {
234 if sign == '+' || sign == '-' {
235 state.advance(1)
236 }
237 }
238 while let Some(ch) = state.peek() {
239 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
240 }
241 }
242 }
243
244 state.add_token(LeanTokenType::IntegerLiteral, start_pos, state.get_position());
245 true
246 }
247 else {
248 false
249 }
250 }
251 else {
252 false
253 }
254 }
255
256 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
258 let start_pos = state.get_position();
259
260 if let Some(ch) = state.peek() {
261 if ch.is_alphabetic() || ch == '_' {
262 state.advance(ch.len_utf8());
263
264 while let Some(ch) = state.peek() {
265 if ch.is_alphanumeric() || ch == '_' || ch == '\'' { state.advance(ch.len_utf8()) } else { break }
266 }
267
268 let text = state.get_text_in(oak_core::Range { start: start_pos, end: state.get_position() });
269 let kind = match text.as_ref() {
270 "axiom" => LeanTokenType::Axiom,
271 "constant" => LeanTokenType::Constant,
272 "def" => LeanTokenType::Def,
273 "example" => LeanTokenType::Example,
274 "inductive" => LeanTokenType::Inductive,
275 "lemma" => LeanTokenType::Lemma,
276 "namespace" => LeanTokenType::Namespace,
277 "open" => LeanTokenType::Open,
278 "private" => LeanTokenType::Private,
279 "protected" => LeanTokenType::Protected,
280 "section" => LeanTokenType::Section,
281 "structure" => LeanTokenType::Structure,
282 "theorem" => LeanTokenType::Theorem,
283 "universe" => LeanTokenType::Universe,
284 "variable" => LeanTokenType::Variable,
285 "variables" => LeanTokenType::Variables,
286 "end" => LeanTokenType::End,
287 "import" => LeanTokenType::Import,
288 "export" => LeanTokenType::Export,
289 "prelude" => LeanTokenType::Prelude,
290 "noncomputable" => LeanTokenType::Noncomputable,
291 "partial" => LeanTokenType::Partial,
292 "unsafe" => LeanTokenType::Unsafe,
293 "mutual" => LeanTokenType::Mutual,
294 "where" => LeanTokenType::Where,
295 "have" => LeanTokenType::Have,
296 "show" => LeanTokenType::Show,
297 "suffices" => LeanTokenType::Suffices,
298 "let" => LeanTokenType::Let,
299 "in" => LeanTokenType::In,
300 "if" => LeanTokenType::If,
301 "then" => LeanTokenType::Then,
302 "else" => LeanTokenType::Else,
303 "match" => LeanTokenType::Match,
304 "with" => LeanTokenType::With,
305 "fun" => LeanTokenType::Fun,
306 "do" => LeanTokenType::Do,
307 "for" => LeanTokenType::For,
308 "while" => LeanTokenType::While,
309 "break" => LeanTokenType::Break,
310 "continue" => LeanTokenType::Continue,
311 "return" => LeanTokenType::Return,
312 "try" => LeanTokenType::Try,
313 "catch" => LeanTokenType::Catch,
314 "finally" => LeanTokenType::Finally,
315 "throw" => LeanTokenType::Throw,
316 _ => LeanTokenType::Identifier,
317 };
318
319 state.add_token(kind, start_pos, state.get_position());
320 true
321 }
322 else {
323 false
324 }
325 }
326 else {
327 false
328 }
329 }
330
331 fn lex_operator_or_delimiter<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
333 let start_pos = state.get_position();
334
335 if let Some(ch) = state.peek() {
336 let kind = match ch {
337 '(' => LeanTokenType::LeftParen,
338 ')' => LeanTokenType::RightParen,
339 '{' => LeanTokenType::LeftBrace,
340 '}' => LeanTokenType::RightBrace,
341 '[' => LeanTokenType::LeftBracket,
342 ']' => LeanTokenType::RightBracket,
343 ',' => LeanTokenType::Comma,
344 ';' => LeanTokenType::Semicolon,
345 '+' => LeanTokenType::Plus,
346 '*' => LeanTokenType::Star,
347 '/' => LeanTokenType::Slash,
348 '%' => LeanTokenType::Percent,
349 '^' => LeanTokenType::Caret,
350 '#' => LeanTokenType::Hash,
351 '&' => LeanTokenType::Ampersand,
352 '|' => LeanTokenType::Pipe,
353 '~' => LeanTokenType::Tilde,
354 '!' => LeanTokenType::Bang,
355 '?' => LeanTokenType::Question,
356 '@' => LeanTokenType::At,
357 '$' => LeanTokenType::Dollar,
358 '<' => LeanTokenType::Lt,
359 '>' => LeanTokenType::Gt,
360 '=' => LeanTokenType::Eq,
361 _ => return false,
362 };
363
364 state.advance(ch.len_utf8());
365 state.add_token(kind, start_pos, state.get_position());
366 true
367 }
368 else {
369 false
370 }
371 }
372
373 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
374 while state.not_at_end() {
375 if self.skip_whitespace(state) || self.lex_newline(state) || self.lex_comment(state) || self.lex_string(state) || self.lex_char(state) || self.lex_number(state) || self.lex_identifier_or_keyword(state) || self.lex_operator_or_delimiter(state) {
376 continue;
377 }
378
379 let start_pos = state.get_position();
381 if let Some(ch) = state.peek() {
382 state.advance(ch.len_utf8());
383 state.add_token(LeanTokenType::Error, start_pos, state.get_position());
384 }
385 }
386
387 Ok(())
388 }
389}