1use crate::{kind::HaskellSyntaxKind, language::HaskellLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, HaskellLanguage>;
5
6#[derive(Clone)]
7pub struct HaskellLexer<'config> {
8 _config: &'config HaskellLanguage,
9}
10
11impl<'config> HaskellLexer<'config> {
12 pub fn new(config: &'config HaskellLanguage) -> Self {
13 Self { _config: config }
14 }
15
16 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
17 let start_pos = state.get_position();
18 while let Some(ch) = state.peek() {
19 if ch == ' ' || ch == '\t' {
20 state.bump();
21 }
22 else {
23 break;
24 }
25 }
26
27 if state.get_position() > start_pos {
28 state.add_token(HaskellSyntaxKind::Whitespace, start_pos, state.get_position());
29 true
30 }
31 else {
32 false
33 }
34 }
35
36 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
37 let start_pos = state.get_position();
38
39 if let Some('\n') = state.peek() {
40 state.bump();
41 state.add_token(HaskellSyntaxKind::Newline, start_pos, state.get_position());
42 true
43 }
44 else if let Some('\r') = state.peek() {
45 state.bump();
46 if let Some('\n') = state.peek() {
47 state.bump();
48 }
49 state.add_token(HaskellSyntaxKind::Newline, start_pos, state.get_position());
50 true
51 }
52 else {
53 false
54 }
55 }
56
57 fn lex_single_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
58 let start_pos = state.get_position();
59
60 if let Some('-') = state.peek() {
61 if let Some('-') = state.peek_next_n(1) {
62 state.advance(2);
63 while let Some(ch) = state.peek() {
64 if ch == '\n' || ch == '\r' {
65 break;
66 }
67 state.bump();
68 }
69 state.add_token(HaskellSyntaxKind::Comment, start_pos, state.get_position());
70 true
71 }
72 else {
73 false
74 }
75 }
76 else {
77 false
78 }
79 }
80
81 fn lex_multi_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82 let start_pos = state.get_position();
83
84 if let Some('{') = state.peek() {
85 if let Some('-') = state.peek_next_n(1) {
86 state.advance(2);
87 let mut depth = 1;
88 while let Some(ch) = state.peek() {
89 if ch == '{' && state.peek_next_n(1) == Some('-') {
90 depth += 1;
91 state.advance(2);
92 }
93 else if ch == '-' && state.peek_next_n(1) == Some('}') {
94 depth -= 1;
95 state.advance(2);
96 if depth == 0 {
97 break;
98 }
99 }
100 else {
101 state.bump();
102 }
103 }
104 state.add_token(HaskellSyntaxKind::Comment, start_pos, state.get_position());
105 true
106 }
107 else {
108 false
109 }
110 }
111 else {
112 false
113 }
114 }
115
116 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
117 let start_pos = state.get_position();
118
119 if let Some(ch) = state.peek() {
120 if ch.is_ascii_alphabetic() || ch == '_' {
121 state.bump();
122
123 while let Some(ch) = state.peek() {
124 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '\'' {
125 state.bump();
126 }
127 else {
128 break;
129 }
130 }
131
132 let end_pos = state.get_position();
133 let text = state.get_text_in((start_pos..end_pos).into());
134 let kind = self.keyword_or_identifier(text.as_ref());
135
136 state.add_token(kind, start_pos, end_pos);
137 true
138 }
139 else {
140 false
141 }
142 }
143 else {
144 false
145 }
146 }
147
148 fn keyword_or_identifier(&self, text: &str) -> HaskellSyntaxKind {
149 match text {
150 "case" => HaskellSyntaxKind::Case,
151 "class" => HaskellSyntaxKind::Class,
152 "data" => HaskellSyntaxKind::Data,
153 "default" => HaskellSyntaxKind::Default,
154 "deriving" => HaskellSyntaxKind::Deriving,
155 "do" => HaskellSyntaxKind::Do,
156 "else" => HaskellSyntaxKind::Else,
157 "if" => HaskellSyntaxKind::If,
158 "import" => HaskellSyntaxKind::Import,
159 "in" => HaskellSyntaxKind::In,
160 "infix" => HaskellSyntaxKind::Infix,
161 "infixl" => HaskellSyntaxKind::Infixl,
162 "infixr" => HaskellSyntaxKind::Infixr,
163 "instance" => HaskellSyntaxKind::Instance,
164 "let" => HaskellSyntaxKind::Let,
165 "module" => HaskellSyntaxKind::Module,
166 "newtype" => HaskellSyntaxKind::Newtype,
167 "of" => HaskellSyntaxKind::Of,
168 "then" => HaskellSyntaxKind::Then,
169 "type" => HaskellSyntaxKind::Type,
170 "where" => HaskellSyntaxKind::Where,
171 _ => HaskellSyntaxKind::Identifier,
172 }
173 }
174
175 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176 let start_pos = state.get_position();
177
178 if let Some(ch) = state.peek() {
179 if ch.is_ascii_digit() {
180 state.bump();
181
182 while let Some(ch) = state.peek() {
183 if ch.is_ascii_digit() {
184 state.bump();
185 }
186 else if ch == '.' {
187 state.bump();
188 while let Some(ch) = state.peek() {
189 if ch.is_ascii_digit() {
190 state.bump();
191 }
192 else {
193 break;
194 }
195 }
196 break;
197 }
198 else {
199 break;
200 }
201 }
202
203 state.add_token(HaskellSyntaxKind::Number, start_pos, state.get_position());
204 true
205 }
206 else {
207 false
208 }
209 }
210 else {
211 false
212 }
213 }
214
215 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
216 let start_pos = state.get_position();
217
218 if let Some('"') = state.peek() {
219 state.bump();
220
221 while let Some(ch) = state.peek() {
222 if ch == '"' {
223 state.bump();
224 state.add_token(HaskellSyntaxKind::StringLiteral, start_pos, state.get_position());
225 return true;
226 }
227 else if ch == '\\' {
228 state.bump();
229 if let Some(_) = state.peek() {
230 state.bump();
231 }
232 }
233 else {
234 state.bump();
235 }
236 }
237
238 state.add_token(HaskellSyntaxKind::StringLiteral, start_pos, state.get_position());
239 true
240 }
241 else {
242 false
243 }
244 }
245
246 fn lex_char<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
247 let start_pos = state.get_position();
248
249 if let Some('\'') = state.peek() {
250 state.bump();
251
252 if let Some(ch) = state.peek() {
253 if ch == '\\' {
254 state.bump();
255 if let Some(_) = state.peek() {
256 state.bump();
257 }
258 }
259 else if ch != '\'' {
260 state.bump();
261 }
262 }
263
264 if let Some('\'') = state.peek() {
265 state.bump();
266 state.add_token(HaskellSyntaxKind::CharLiteral, start_pos, state.get_position());
267 true
268 }
269 else {
270 state.add_token(HaskellSyntaxKind::CharLiteral, start_pos, state.get_position());
271 true
272 }
273 }
274 else {
275 false
276 }
277 }
278
279 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
280 let start_pos = state.get_position();
281
282 if let Some(ch) = state.peek() {
283 let token_kind = match ch {
284 '+' => {
285 state.bump();
286 if let Some('+') = state.peek() {
287 state.bump();
288 HaskellSyntaxKind::Append
289 }
290 else {
291 HaskellSyntaxKind::Plus
292 }
293 }
294 '-' => {
295 state.bump();
296 if let Some('>') = state.peek() {
297 state.bump();
298 HaskellSyntaxKind::Arrow
299 }
300 else {
301 HaskellSyntaxKind::Minus
302 }
303 }
304 '*' => {
305 state.bump();
306 HaskellSyntaxKind::Star
307 }
308 '/' => {
309 state.bump();
310 HaskellSyntaxKind::Slash
311 }
312 '=' => {
313 state.bump();
314 if let Some('=') = state.peek() {
315 state.bump();
316 HaskellSyntaxKind::Equal
317 }
318 else {
319 HaskellSyntaxKind::Assign
320 }
321 }
322 '<' => {
323 state.bump();
324 if let Some('=') = state.peek() {
325 state.bump();
326 HaskellSyntaxKind::LessEqual
327 }
328 else if let Some('-') = state.peek() {
329 state.bump();
330 HaskellSyntaxKind::LeftArrow
331 }
332 else {
333 HaskellSyntaxKind::Less
334 }
335 }
336 '>' => {
337 state.bump();
338 if let Some('=') = state.peek() {
339 state.bump();
340 HaskellSyntaxKind::GreaterEqual
341 }
342 else {
343 HaskellSyntaxKind::Greater
344 }
345 }
346 ':' => {
347 state.bump();
348 if let Some(':') = state.peek() {
349 state.bump();
350 HaskellSyntaxKind::DoubleColon
351 }
352 else {
353 HaskellSyntaxKind::Colon
354 }
355 }
356 '|' => {
357 state.bump();
358 HaskellSyntaxKind::Pipe
359 }
360 '&' => {
361 state.bump();
362 HaskellSyntaxKind::Ampersand
363 }
364 '!' => {
365 state.bump();
366 HaskellSyntaxKind::Bang
367 }
368 '?' => {
369 state.bump();
370 HaskellSyntaxKind::Question
371 }
372 ';' => {
373 state.bump();
374 HaskellSyntaxKind::Semicolon
375 }
376 ',' => {
377 state.bump();
378 HaskellSyntaxKind::Comma
379 }
380 '.' => {
381 state.bump();
382 if let Some('.') = state.peek() {
383 state.bump();
384 HaskellSyntaxKind::DoubleDot
385 }
386 else {
387 HaskellSyntaxKind::Dot
388 }
389 }
390 '$' => {
391 state.bump();
392 HaskellSyntaxKind::Dollar
393 }
394 '@' => {
395 state.bump();
396 HaskellSyntaxKind::At
397 }
398 '~' => {
399 state.bump();
400 HaskellSyntaxKind::Tilde
401 }
402 '\\' => {
403 state.bump();
404 HaskellSyntaxKind::Backslash
405 }
406 '`' => {
407 state.bump();
408 HaskellSyntaxKind::Backtick
409 }
410 _ => return false,
411 };
412
413 state.add_token(token_kind, start_pos, state.get_position());
414 true
415 }
416 else {
417 false
418 }
419 }
420
421 fn lex_delimiters<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
422 let start_pos = state.get_position();
423
424 if let Some(ch) = state.peek() {
425 let token_kind = match ch {
426 '(' => {
427 state.bump();
428 HaskellSyntaxKind::LeftParen
429 }
430 ')' => {
431 state.bump();
432 HaskellSyntaxKind::RightParen
433 }
434 '[' => {
435 state.bump();
436 HaskellSyntaxKind::LeftBracket
437 }
438 ']' => {
439 state.bump();
440 HaskellSyntaxKind::RightBracket
441 }
442 '{' => {
443 state.bump();
444 HaskellSyntaxKind::LeftBrace
445 }
446 '}' => {
447 state.bump();
448 HaskellSyntaxKind::RightBrace
449 }
450 _ => return false,
451 };
452
453 state.add_token(token_kind, start_pos, state.get_position());
454 true
455 }
456 else {
457 false
458 }
459 }
460}
461
462impl<'config> Lexer<HaskellLanguage> for HaskellLexer<'config> {
463 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<HaskellLanguage>) -> LexOutput<HaskellLanguage> {
464 let mut state = State::new(source);
465
466 while state.not_at_end() {
467 let safe_point = state.get_position();
468 if self.skip_whitespace(&mut state) {
469 continue;
470 }
471
472 if self.lex_newline(&mut state) {
473 continue;
474 }
475
476 if self.lex_single_line_comment(&mut state) {
477 continue;
478 }
479
480 if self.lex_multi_line_comment(&mut state) {
481 continue;
482 }
483
484 if self.lex_identifier_or_keyword(&mut state) {
485 continue;
486 }
487
488 if self.lex_number(&mut state) {
489 continue;
490 }
491
492 if self.lex_string(&mut state) {
493 continue;
494 }
495
496 if self.lex_char(&mut state) {
497 continue;
498 }
499
500 if self.lex_operators(&mut state) {
501 continue;
502 }
503
504 if self.lex_delimiters(&mut state) {
505 continue;
506 }
507
508 let start_pos = state.get_position();
510 if state.peek().is_some() {
511 state.advance(1);
512 state.add_token(HaskellSyntaxKind::Error, start_pos, state.get_position());
513 }
514
515 state.advance_if_dead_lock(safe_point);
516 }
517
518 let pos = state.get_position();
520 state.add_token(HaskellSyntaxKind::Eof, pos, pos);
521
522 state.finish_with_cache(Ok(()), cache)
523 }
524}