1use crate::{kind::JavaSyntaxKind, language::JavaLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, JavaLanguage>;
5
6#[derive(Clone)]
7pub struct JavaLexer<'config> {
8 _config: &'config JavaLanguage,
9}
10
11impl<'config> Lexer<JavaLanguage> for JavaLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<JavaLanguage>) -> LexOutput<JavaLanguage> {
13 let mut state = State::new(source);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> JavaLexer<'config> {
23 pub fn new(config: &'config JavaLanguage) -> Self {
24 Self { _config: config }
25 }
26
27 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
29 while state.not_at_end() {
30 let safe_point = state.get_position();
31
32 if self.skip_whitespace(state) {
33 continue;
34 }
35
36 if self.lex_newline(state) {
37 continue;
38 }
39
40 if self.skip_comment(state) {
41 continue;
42 }
43
44 if self.lex_string_literal(state) {
45 continue;
46 }
47
48 if self.lex_char_literal(state) {
49 continue;
50 }
51
52 if self.lex_number_literal(state) {
53 continue;
54 }
55
56 if self.lex_identifier_or_keyword(state) {
57 continue;
58 }
59
60 if self.lex_operator_or_delimiter(state) {
61 continue;
62 }
63
64 let start_pos = state.get_position();
66 if let Some(ch) = state.peek() {
67 state.advance(ch.len_utf8());
68 state.add_token(JavaSyntaxKind::Error, start_pos, state.get_position());
69 }
70
71 state.advance_if_dead_lock(safe_point);
72 }
73
74 Ok(())
75 }
76
77 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
79 let start = state.get_position();
80
81 while let Some(ch) = state.peek() {
82 if ch == ' ' || ch == '\t' || ch == '\r' {
83 state.advance(ch.len_utf8());
84 }
85 else {
86 break;
87 }
88 }
89
90 if state.get_position() > start {
91 state.add_token(JavaSyntaxKind::Whitespace, start, state.get_position());
92 return true;
93 }
94 false
95 }
96
97 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
99 let start = state.get_position();
100
101 if let Some('\n') = state.peek() {
102 state.advance(1);
103 state.add_token(JavaSyntaxKind::Whitespace, start, state.get_position());
104 true
105 }
106 else {
107 false
108 }
109 }
110
111 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113 let start = state.get_position();
114
115 if state.peek() == Some('/') && state.peek_next_n(1) == Some('/') {
117 state.advance(2);
118 while let Some(ch) = state.peek() {
119 if ch == '\n' {
120 break;
121 }
122 state.advance(ch.len_utf8());
123 }
124 state.add_token(JavaSyntaxKind::LineComment, start, state.get_position());
125 return true;
126 }
127
128 if state.peek() == Some('/') && state.peek_next_n(1) == Some('*') {
130 state.advance(2);
131 while let Some(ch) = state.peek() {
132 if ch == '*' && state.peek_next_n(1) == Some('/') {
133 state.advance(2);
134 break;
135 }
136 state.advance(ch.len_utf8());
137 }
138 state.add_token(JavaSyntaxKind::BlockComment, start, state.get_position());
139 return true;
140 }
141
142 false
143 }
144
145 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
147 let start = state.get_position();
148
149 if let Some('"') = state.peek() {
150 state.advance(1);
151
152 while let Some(ch) = state.peek() {
153 if ch == '"' {
154 state.advance(1);
155 break;
156 }
157 else if ch == '\\' {
158 state.advance(1);
159 if let Some(escaped) = state.peek() {
160 state.advance(escaped.len_utf8());
161 }
162 }
163 else if ch == '\n' {
164 break;
166 }
167 else {
168 state.advance(ch.len_utf8());
169 }
170 }
171
172 state.add_token(JavaSyntaxKind::StringLiteral, start, state.get_position());
173 return true;
174 }
175
176 false
177 }
178
179 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
181 let start = state.get_position();
182
183 if let Some('\'') = state.peek() {
184 state.advance(1);
185
186 if let Some(ch) = state.peek() {
187 if ch == '\\' {
188 state.advance(1);
189 if let Some(escaped) = state.peek() {
190 state.advance(escaped.len_utf8());
191 }
192 }
193 else if ch != '\'' && ch != '\n' {
194 state.advance(ch.len_utf8());
195 }
196 }
197
198 if let Some('\'') = state.peek() {
199 state.advance(1);
200 }
201
202 state.add_token(JavaSyntaxKind::CharacterLiteral, start, state.get_position());
203 return true;
204 }
205
206 false
207 }
208
209 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
211 let start = state.get_position();
212
213 if let Some(ch) = state.peek() {
214 if ch.is_ascii_digit() {
215 while let Some(ch) = state.peek() {
217 if ch.is_ascii_digit() {
218 state.advance(ch.len_utf8());
219 }
220 else {
221 break;
222 }
223 }
224
225 if state.peek() == Some('.') && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
227 state.advance(1); while let Some(ch) = state.peek() {
229 if ch.is_ascii_digit() {
230 state.advance(ch.len_utf8());
231 }
232 else {
233 break;
234 }
235 }
236 }
237
238 if let Some(ch) = state.peek() {
240 if ch == 'e' || ch == 'E' {
241 state.advance(1);
242 if let Some(sign) = state.peek() {
243 if sign == '+' || sign == '-' {
244 state.advance(1);
245 }
246 }
247 while let Some(ch) = state.peek() {
248 if ch.is_ascii_digit() {
249 state.advance(ch.len_utf8());
250 }
251 else {
252 break;
253 }
254 }
255 }
256 }
257
258 if let Some(suffix) = state.peek() {
260 if suffix == 'f' || suffix == 'F' || suffix == 'd' || suffix == 'D' || suffix == 'l' || suffix == 'L' {
261 state.advance(1);
262 }
263 }
264
265 state.add_token(JavaSyntaxKind::IntegerLiteral, start, state.get_position());
266 return true;
267 }
268 }
269
270 false
271 }
272
273 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
275 let start = state.get_position();
276
277 if let Some(ch) = state.peek() {
278 if ch.is_ascii_alphabetic() || ch == '_' || ch == '$' {
279 state.advance(ch.len_utf8());
280
281 while let Some(ch) = state.peek() {
282 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
283 state.advance(ch.len_utf8());
284 }
285 else {
286 break;
287 }
288 }
289
290 let text = state.get_text_in((start..state.get_position()).into());
291 let token_kind = self.classify_identifier(text.as_ref());
292
293 state.add_token(token_kind, start, state.get_position());
294 true
295 }
296 else {
297 false
298 }
299 }
300 else {
301 false
302 }
303 }
304
305 fn classify_identifier(&self, text: &str) -> JavaSyntaxKind {
307 match text {
308 "abstract" => JavaSyntaxKind::Abstract,
309 "assert" => JavaSyntaxKind::Assert,
310 "boolean" => JavaSyntaxKind::Boolean,
311 "break" => JavaSyntaxKind::Break,
312 "byte" => JavaSyntaxKind::Byte,
313 "case" => JavaSyntaxKind::Case,
314 "catch" => JavaSyntaxKind::Catch,
315 "char" => JavaSyntaxKind::Char,
316 "class" => JavaSyntaxKind::Class,
317 "const" => JavaSyntaxKind::Const,
318 "continue" => JavaSyntaxKind::Continue,
319 "default" => JavaSyntaxKind::Default,
320 "do" => JavaSyntaxKind::Do,
321 "double" => JavaSyntaxKind::Double,
322 "else" => JavaSyntaxKind::Else,
323 "enum" => JavaSyntaxKind::Enum,
324 "extends" => JavaSyntaxKind::Extends,
325 "final" => JavaSyntaxKind::Final,
326 "finally" => JavaSyntaxKind::Finally,
327 "float" => JavaSyntaxKind::Float,
328 "for" => JavaSyntaxKind::For,
329 "goto" => JavaSyntaxKind::Goto,
330 "if" => JavaSyntaxKind::If,
331 "implements" => JavaSyntaxKind::Implements,
332 "import" => JavaSyntaxKind::Import,
333 "instanceof" => JavaSyntaxKind::Instanceof,
334 "int" => JavaSyntaxKind::Int,
335 "interface" => JavaSyntaxKind::Interface,
336 "long" => JavaSyntaxKind::Long,
337 "native" => JavaSyntaxKind::Native,
338 "new" => JavaSyntaxKind::New,
339 "package" => JavaSyntaxKind::Package,
340 "private" => JavaSyntaxKind::Private,
341 "protected" => JavaSyntaxKind::Protected,
342 "public" => JavaSyntaxKind::Public,
343 "return" => JavaSyntaxKind::Return,
344 "short" => JavaSyntaxKind::Short,
345 "static" => JavaSyntaxKind::Static,
346 "strictfp" => JavaSyntaxKind::Strictfp,
347 "super" => JavaSyntaxKind::Super,
348 "switch" => JavaSyntaxKind::Switch,
349 "synchronized" => JavaSyntaxKind::Synchronized,
350 "this" => JavaSyntaxKind::This,
351 "throw" => JavaSyntaxKind::Throw,
352 "throws" => JavaSyntaxKind::Throws,
353 "transient" => JavaSyntaxKind::Transient,
354 "try" => JavaSyntaxKind::Try,
355 "void" => JavaSyntaxKind::Void,
356 "volatile" => JavaSyntaxKind::Volatile,
357 "while" => JavaSyntaxKind::While,
358 "true" | "false" => JavaSyntaxKind::BooleanLiteral,
359 "null" => JavaSyntaxKind::NullLiteral,
360 _ => JavaSyntaxKind::Identifier,
361 }
362 }
363
364 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
366 let start = state.get_position();
367
368 if let Some(ch) = state.peek() {
369 let token_kind = match ch {
370 '+' => {
371 state.advance(1);
372 if state.peek() == Some('+') {
373 state.advance(1);
374 JavaSyntaxKind::PlusPlus
375 }
376 else if state.peek() == Some('=') {
377 state.advance(1);
378 JavaSyntaxKind::PlusEquals
379 }
380 else {
381 JavaSyntaxKind::Plus
382 }
383 }
384 '-' => {
385 state.advance(1);
386 if state.peek() == Some('-') {
387 state.advance(1);
388 JavaSyntaxKind::MinusMinus
389 }
390 else if state.peek() == Some('=') {
391 state.advance(1);
392 JavaSyntaxKind::MinusEquals
393 }
394 else {
395 JavaSyntaxKind::Minus
396 }
397 }
398 '*' => {
399 state.advance(1);
400 if state.peek() == Some('=') {
401 state.advance(1);
402 JavaSyntaxKind::AsteriskEquals
403 }
404 else {
405 JavaSyntaxKind::Asterisk
406 }
407 }
408 '/' => {
409 state.advance(1);
410 if state.peek() == Some('=') {
411 state.advance(1);
412 JavaSyntaxKind::SlashEquals
413 }
414 else {
415 JavaSyntaxKind::Slash
416 }
417 }
418 '%' => {
419 state.advance(1);
420 if state.peek() == Some('=') {
421 state.advance(1);
422 JavaSyntaxKind::PercentEquals
423 }
424 else {
425 JavaSyntaxKind::Percent
426 }
427 }
428 '=' => {
429 state.advance(1);
430 if state.peek() == Some('=') {
431 state.advance(1);
432 JavaSyntaxKind::Equals
433 }
434 else {
435 JavaSyntaxKind::Assign
436 }
437 }
438 '!' => {
439 state.advance(1);
440 if state.peek() == Some('=') {
441 state.advance(1);
442 JavaSyntaxKind::BangEquals
443 }
444 else {
445 JavaSyntaxKind::Bang
446 }
447 }
448 '<' => {
449 state.advance(1);
450 if state.peek() == Some('=') {
451 state.advance(1);
452 JavaSyntaxKind::LessThanEquals
453 }
454 else if state.peek() == Some('<') {
455 state.advance(1);
456 if state.peek() == Some('=') {
457 state.advance(1);
458 JavaSyntaxKind::LeftShiftEquals
459 }
460 else {
461 JavaSyntaxKind::LeftShift
462 }
463 }
464 else {
465 JavaSyntaxKind::LessThan
466 }
467 }
468 '>' => {
469 state.advance(1);
470 if state.peek() == Some('=') {
471 state.advance(1);
472 JavaSyntaxKind::GreaterThanEquals
473 }
474 else if state.peek() == Some('>') {
475 state.advance(1);
476 if state.peek() == Some('>') {
477 state.advance(1);
478 if state.peek() == Some('=') {
479 state.advance(1);
480 JavaSyntaxKind::UnsignedRightShiftEquals
481 }
482 else {
483 JavaSyntaxKind::UnsignedRightShift
484 }
485 }
486 else if state.peek() == Some('=') {
487 state.advance(1);
488 JavaSyntaxKind::RightShiftEquals
489 }
490 else {
491 JavaSyntaxKind::RightShift
492 }
493 }
494 else {
495 JavaSyntaxKind::GreaterThan
496 }
497 }
498 '&' => {
499 state.advance(1);
500 if state.peek() == Some('&') {
501 state.advance(1);
502 JavaSyntaxKind::AmpersandAmpersand
503 }
504 else if state.peek() == Some('=') {
505 state.advance(1);
506 JavaSyntaxKind::AmpersandEquals
507 }
508 else {
509 JavaSyntaxKind::Ampersand
510 }
511 }
512 '|' => {
513 state.advance(1);
514 if state.peek() == Some('|') {
515 state.advance(1);
516 JavaSyntaxKind::PipePipe
517 }
518 else if state.peek() == Some('=') {
519 state.advance(1);
520 JavaSyntaxKind::PipeEquals
521 }
522 else {
523 JavaSyntaxKind::Pipe
524 }
525 }
526 '^' => {
527 state.advance(1);
528 if state.peek() == Some('=') {
529 state.advance(1);
530 JavaSyntaxKind::CaretEquals
531 }
532 else {
533 JavaSyntaxKind::Caret
534 }
535 }
536 '~' => {
537 state.advance(1);
538 JavaSyntaxKind::Tilde
539 }
540 '?' => {
541 state.advance(1);
542 JavaSyntaxKind::Question
543 }
544 ':' => {
545 state.advance(1);
546 JavaSyntaxKind::Colon
547 }
548 ';' => {
549 state.advance(1);
550 JavaSyntaxKind::Semicolon
551 }
552 ',' => {
553 state.advance(1);
554 JavaSyntaxKind::Comma
555 }
556 '.' => {
557 state.advance(1);
558 if state.peek() == Some('.') && state.peek_next_n(1) == Some('.') {
559 state.advance(2);
560 JavaSyntaxKind::Ellipsis
561 }
562 else {
563 JavaSyntaxKind::Dot
564 }
565 }
566 '(' => {
567 state.advance(1);
568 JavaSyntaxKind::LeftParen
569 }
570 ')' => {
571 state.advance(1);
572 JavaSyntaxKind::RightParen
573 }
574 '{' => {
575 state.advance(1);
576 JavaSyntaxKind::LeftBrace
577 }
578 '}' => {
579 state.advance(1);
580 JavaSyntaxKind::RightBrace
581 }
582 '[' => {
583 state.advance(1);
584 JavaSyntaxKind::LeftBracket
585 }
586 ']' => {
587 state.advance(1);
588 JavaSyntaxKind::RightBracket
589 }
590 '@' => {
591 state.advance(1);
592 JavaSyntaxKind::At
593 }
594 _ => return false,
595 };
596
597 state.add_token(token_kind, start, state.get_position());
598 true
599 }
600 else {
601 false
602 }
603 }
604}