1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::LeanLanguage, lexer::token_type::LeanTokenType};
9use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
10
11pub(crate) type State<'a, S> = LexerState<'a, S, LeanLanguage>;
12
13#[derive(Debug, Clone)]
15pub struct LeanLexer<'config> {
16 config: &'config LeanLanguage,
17}
18
19impl<'config> Lexer<LeanLanguage> for LeanLexer<'config> {
20 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LeanLanguage>) -> LexOutput<LeanLanguage> {
21 let mut state = State::new(text);
22 let result = self.run(&mut state);
23 if result.is_ok() {
24 state.add_eof()
25 }
26 state.finish_with_cache(result, cache)
27 }
28}
29
30impl<'config> LeanLexer<'config> {
31 pub fn new(config: &'config LeanLanguage) -> Self {
33 Self { config }
34 }
35
36 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
38 let start_pos = state.get_position();
39
40 while let Some(ch) = state.peek() {
41 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
42 }
43
44 if state.get_position() > start_pos {
45 state.add_token(LeanTokenType::Whitespace, start_pos, state.get_position());
46 true
47 }
48 else {
49 false
50 }
51 }
52
53 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
55 let start_pos = state.get_position();
56
57 if let Some('\n') = state.peek() {
58 state.advance(1);
59 state.add_token(LeanTokenType::Newline, start_pos, state.get_position());
60 true
61 }
62 else if let Some('\r') = state.peek() {
63 state.advance(1);
64 if let Some('\n') = state.peek() {
65 state.advance(1)
66 }
67 state.add_token(LeanTokenType::Newline, start_pos, state.get_position());
68 true
69 }
70 else {
71 false
72 }
73 }
74
75 fn lex_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
77 let start_pos = state.get_position();
78
79 if let Some('-') = state.peek() {
80 state.advance(1);
81 if let Some('-') = state.peek() {
82 state.advance(1);
84 while let Some(ch) = state.peek() {
85 if ch == '\n' || ch == '\r' {
86 break;
87 }
88 state.advance(ch.len_utf8())
89 }
90 state.add_token(LeanTokenType::Comment, start_pos, state.get_position());
91 true
92 }
93 else {
94 state.set_position(start_pos);
96 false
97 }
98 }
99 else if let Some('/') = state.peek() {
100 state.advance(1);
101 if let Some('-') = state.peek() {
102 state.advance(1);
104 let mut depth = 1;
105 while depth > 0 && state.not_at_end() {
106 if let Some('/') = state.peek() {
107 state.advance(1);
108 if let Some('-') = state.peek() {
109 state.advance(1);
110 depth += 1
111 }
112 }
113 else if let Some('-') = state.peek() {
114 state.advance(1);
115 if let Some('/') = state.peek() {
116 state.advance(1);
117 depth -= 1
118 }
119 }
120 else if let Some(ch) = state.peek() {
121 state.advance(ch.len_utf8())
122 }
123 else {
124 break;
125 }
126 }
127 state.add_token(LeanTokenType::Comment, start_pos, state.get_position());
128 true
129 }
130 else {
131 state.set_position(start_pos);
133 false
134 }
135 }
136 else {
137 false
138 }
139 }
140
141 fn lex_string<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
143 let start_pos = state.get_position();
144
145 if let Some('"') = state.peek() {
146 state.advance(1);
147
148 while let Some(ch) = state.peek() {
149 if ch == '"' {
150 state.advance(1);
151 break;
152 }
153 else if ch == '\\' {
154 state.advance(1);
155 if let Some(escaped) = state.peek() {
156 state.advance(escaped.len_utf8())
157 }
158 }
159 else if ch == '\n' || ch == '\r' {
160 break; }
162 else {
163 state.advance(ch.len_utf8())
164 }
165 }
166 state.add_token(LeanTokenType::StringLiteral, start_pos, state.get_position());
167 true
168 }
169 else {
170 false
171 }
172 }
173
174 fn lex_char<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
176 let start_pos = state.get_position();
177
178 if let Some('\'') = state.peek() {
179 state.advance(1);
180
181 while let Some(ch) = state.peek() {
182 if ch == '\'' {
183 state.advance(1);
184 break;
185 }
186 else if ch == '\\' {
187 state.advance(1);
188 if let Some(escaped) = state.peek() {
189 state.advance(escaped.len_utf8())
190 }
191 }
192 else if ch == '\n' || ch == '\r' {
193 break;
194 }
195 else {
196 state.advance(ch.len_utf8())
197 }
198 }
199 state.add_token(LeanTokenType::CharLiteral, start_pos, state.get_position());
200 true
201 }
202 else {
203 false
204 }
205 }
206
207 fn lex_number<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
209 let start_pos = state.get_position();
210
211 if let Some(ch) = state.peek() {
212 if ch.is_ascii_digit() {
213 while let Some(ch) = state.peek() {
215 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
216 }
217
218 if let Some('.') = state.peek() {
220 state.advance(1); if let Some(next_char) = state.peek() {
222 if next_char.is_ascii_digit() {
223 while let Some(ch) = state.peek() {
224 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
225 }
226 }
227 }
228 }
229
230 if let Some(ch) = state.peek() {
232 if ch == 'e' || ch == 'E' {
233 state.advance(1);
234 if let Some(sign) = state.peek() {
235 if sign == '+' || sign == '-' {
236 state.advance(1)
237 }
238 }
239 while let Some(ch) = state.peek() {
240 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
241 }
242 }
243 }
244
245 state.add_token(LeanTokenType::IntegerLiteral, start_pos, state.get_position());
246 true
247 }
248 else {
249 false
250 }
251 }
252 else {
253 false
254 }
255 }
256
257 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
259 let start_pos = state.get_position();
260
261 if let Some(ch) = state.peek() {
262 if ch.is_alphabetic() || ch == '_' {
263 state.advance(ch.len_utf8());
264
265 while let Some(ch) = state.peek() {
266 if ch.is_alphanumeric() || ch == '_' || ch == '\'' { state.advance(ch.len_utf8()) } else { break }
267 }
268
269 let text = state.get_text_in(oak_core::Range { start: start_pos, end: state.get_position() });
270 let kind = match text.as_ref() {
271 "axiom" => LeanTokenType::Axiom,
272 "constant" => LeanTokenType::Constant,
273 "def" => LeanTokenType::Def,
274 "example" => LeanTokenType::Example,
275 "inductive" => LeanTokenType::Inductive,
276 "lemma" => LeanTokenType::Lemma,
277 "namespace" => LeanTokenType::Namespace,
278 "open" => LeanTokenType::Open,
279 "private" => LeanTokenType::Private,
280 "protected" => LeanTokenType::Protected,
281 "section" => LeanTokenType::Section,
282 "structure" => LeanTokenType::Structure,
283 "theorem" => LeanTokenType::Theorem,
284 "universe" => LeanTokenType::Universe,
285 "variable" => LeanTokenType::Variable,
286 "variables" => LeanTokenType::Variables,
287 "end" => LeanTokenType::End,
288 "import" => LeanTokenType::Import,
289 "export" => LeanTokenType::Export,
290 "prelude" => LeanTokenType::Prelude,
291 "noncomputable" => LeanTokenType::Noncomputable,
292 "partial" => LeanTokenType::Partial,
293 "unsafe" => LeanTokenType::Unsafe,
294 "mutual" => LeanTokenType::Mutual,
295 "where" => LeanTokenType::Where,
296 "have" => LeanTokenType::Have,
297 "show" => LeanTokenType::Show,
298 "suffices" => LeanTokenType::Suffices,
299 "let" => LeanTokenType::Let,
300 "in" => LeanTokenType::In,
301 "if" => LeanTokenType::If,
302 "then" => LeanTokenType::Then,
303 "else" => LeanTokenType::Else,
304 "match" => LeanTokenType::Match,
305 "with" => LeanTokenType::With,
306 "fun" => LeanTokenType::Fun,
307 "do" => LeanTokenType::Do,
308 "for" => LeanTokenType::For,
309 "while" => LeanTokenType::While,
310 "break" => LeanTokenType::Break,
311 "continue" => LeanTokenType::Continue,
312 "return" => LeanTokenType::Return,
313 "try" => LeanTokenType::Try,
314 "catch" => LeanTokenType::Catch,
315 "finally" => LeanTokenType::Finally,
316 "throw" => LeanTokenType::Throw,
317 _ => LeanTokenType::Identifier,
318 };
319
320 state.add_token(kind, start_pos, state.get_position());
321 true
322 }
323 else {
324 false
325 }
326 }
327 else {
328 false
329 }
330 }
331
332 fn lex_operator_or_delimiter<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
334 let start_pos = state.get_position();
335
336 if let Some(ch) = state.peek() {
337 let kind = match ch {
338 '(' => LeanTokenType::LeftParen,
339 ')' => LeanTokenType::RightParen,
340 '{' => LeanTokenType::LeftBrace,
341 '}' => LeanTokenType::RightBrace,
342 '[' => LeanTokenType::LeftBracket,
343 ']' => LeanTokenType::RightBracket,
344 ',' => LeanTokenType::Comma,
345 ';' => LeanTokenType::Semicolon,
346 '+' => LeanTokenType::Plus,
347 '*' => LeanTokenType::Star,
348 '/' => LeanTokenType::Slash,
349 '%' => LeanTokenType::Percent,
350 '^' => LeanTokenType::Caret,
351 '#' => LeanTokenType::Hash,
352 '&' => LeanTokenType::Ampersand,
353 '|' => LeanTokenType::Pipe,
354 '~' => LeanTokenType::Tilde,
355 '!' => LeanTokenType::Bang,
356 '?' => LeanTokenType::Question,
357 '@' => LeanTokenType::At,
358 '$' => LeanTokenType::Dollar,
359 '<' => LeanTokenType::Lt,
360 '>' => LeanTokenType::Gt,
361 '=' => LeanTokenType::Eq,
362 _ => return false,
363 };
364
365 state.advance(ch.len_utf8());
366 state.add_token(kind, start_pos, state.get_position());
367 true
368 }
369 else {
370 false
371 }
372 }
373
374 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
375 while state.not_at_end() {
376 if self.skip_whitespace(state) || self.lex_newline(state) || self.lex_comment(state) || self.lex_string(state) || self.lex_char(state) || self.lex_number(state) || self.lex_identifier_or_keyword(state) || self.lex_operator_or_delimiter(state) {
377 continue;
378 }
379
380 let start_pos = state.get_position();
382 if let Some(ch) = state.peek() {
383 state.advance(ch.len_utf8());
384 state.add_token(LeanTokenType::Error, start_pos, state.get_position());
385 }
386 }
387
388 Ok(())
389 }
390}