1#![doc = include_str!("readme.md")]
2
3use oak_core::Source;
4pub mod token_type;
5
6use crate::{language::GroovyLanguage, lexer::token_type::GroovyTokenType};
7use oak_core::{
8 Lexer, LexerCache, LexerState, OakError,
9 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, GroovyLanguage>;
14
15static GROOVY_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static GROOVY_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: false });
17static GROOVY_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18static GROOVY_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
19
20#[derive(Clone)]
22pub struct GroovyLexer<'config> {
23 config: &'config GroovyLanguage,
24}
25
26impl<'config> Lexer<GroovyLanguage> for GroovyLexer<'config> {
27 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<GroovyLanguage>) -> LexOutput<GroovyLanguage> {
28 let mut state = LexerState::new(source);
29 let result = self.run(&mut state);
30 if result.is_ok() {
31 state.add_eof();
32 }
33 state.finish_with_cache(result, cache)
34 }
35}
36
37impl<'config> GroovyLexer<'config> {
38 pub fn new(config: &'config GroovyLanguage) -> Self {
40 Self { config }
41 }
42
43 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
45 while state.not_at_end() {
46 let safe_point = state.get_position();
47
48 if self.skip_whitespace(state) {
49 continue;
50 }
51
52 if self.skip_comment(state) {
53 continue;
54 }
55
56 if self.lex_string_literal(state) {
57 continue;
58 }
59
60 if self.lex_char_literal(state) {
61 continue;
62 }
63
64 if self.lex_number_literal(state) {
65 continue;
66 }
67
68 if self.lex_identifier_or_keyword(state) {
69 continue;
70 }
71
72 if self.lex_operators(state) {
73 continue;
74 }
75
76 if self.lex_single_char_tokens(state) {
77 continue;
78 }
79
80 state.advance_if_dead_lock(safe_point);
81 }
82
83 Ok(())
84 }
85
86 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 GROOVY_WHITESPACE.scan(state, GroovyTokenType::Whitespace)
89 }
90
91 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
93 if GROOVY_COMMENT.scan(state, GroovyTokenType::Comment, GroovyTokenType::Comment) {
95 return true;
96 }
97
98 false
99 }
100
101 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
103 if GROOVY_STRING.scan(state, GroovyTokenType::StringLiteral) {
105 return true;
106 }
107
108 if state.consume_if_starts_with("\"\"\"") {
110 let start = state.get_position() - 3;
111
112 while state.not_at_end() {
113 if state.consume_if_starts_with("\"\"\"") {
114 break;
115 }
116 if let Some(ch) = state.peek() {
117 state.advance(ch.len_utf8());
118 }
119 }
120
121 let end = state.get_position();
122 state.add_token(GroovyTokenType::StringLiteral, start, end);
123 return true;
124 }
125
126 if state.consume_if_starts_with("$/") {
128 let start = state.get_position() - 2;
129
130 while state.not_at_end() {
131 if state.consume_if_starts_with("/$") {
132 break;
133 }
134 if let Some(ch) = state.peek() {
135 state.advance(ch.len_utf8());
136 }
137 }
138
139 let end = state.get_position();
140 state.add_token(GroovyTokenType::StringLiteral, start, end);
141 return true;
142 }
143
144 false
145 }
146
147 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
149 GROOVY_CHAR.scan(state, GroovyTokenType::CharLiteral)
150 }
151
152 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
154 let start = state.get_position();
155 let mut has_digits = false;
156 let mut _is_float = false;
157
158 if state.consume_if_starts_with("-") {
160 }
162
163 if state.consume_if_starts_with("0x") || state.consume_if_starts_with("0X") {
165 while let Some(ch) = state.peek() {
166 if ch.is_ascii_hexdigit() {
167 state.advance(ch.len_utf8());
168 has_digits = true;
169 }
170 else {
171 break;
172 }
173 }
174 }
175 else if state.peek() == Some('0') {
177 state.advance(1);
178 has_digits = true;
179 while let Some(ch) = state.peek() {
180 if ch >= '0' && ch <= '7' {
181 state.advance(ch.len_utf8());
182 }
183 else {
184 break;
185 }
186 }
187 }
188 else {
190 while let Some(ch) = state.peek() {
192 if ch.is_ascii_digit() {
193 state.advance(ch.len_utf8());
194 has_digits = true;
195 }
196 else {
197 break;
198 }
199 }
200
201 if state.peek() == Some('.') && has_digits {
203 if let Some(next_ch) = state.peek_next_n(1) {
204 if next_ch.is_ascii_digit() {
205 state.advance(1); _is_float = true;
207
208 while let Some(ch) = state.peek() {
209 if ch.is_ascii_digit() {
210 state.advance(ch.len_utf8());
211 }
212 else {
213 break;
214 }
215 }
216 }
217 }
218 }
219
220 if let Some(ch) = state.peek() {
222 if (ch == 'e' || ch == 'E') && has_digits {
223 state.advance(1);
224 _is_float = true;
225
226 if let Some(next) = state.peek() {
228 if next == '+' || next == '-' {
229 state.advance(1);
230 }
231 }
232
233 let mut exp_digits = false;
235 while let Some(ch) = state.peek() {
236 if ch.is_ascii_digit() {
237 state.advance(ch.len_utf8());
238 exp_digits = true;
239 }
240 else {
241 break;
242 }
243 }
244
245 if !exp_digits {
246 return false;
248 }
249 }
250 }
251 }
252
253 if has_digits {
255 if let Some(ch) = state.peek() {
256 if matches!(ch, 'G' | 'g' | 'L' | 'l' | 'F' | 'f' | 'D' | 'd') {
257 state.advance(ch.len_utf8());
258 _is_float = matches!(ch, 'F' | 'f' | 'D' | 'd' | 'G' | 'g');
259 }
260 }
261 }
262
263 if has_digits {
264 let end = state.get_position();
265 let kind = if _is_float { GroovyTokenType::FloatLiteral } else { GroovyTokenType::IntLiteral };
266 state.add_token(kind, start, end);
267 true
268 }
269 else {
270 false
271 }
272 }
273
274 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
276 let start = state.get_position();
277
278 if let Some(first_ch) = state.peek() {
280 if !first_ch.is_alphabetic() && first_ch != '_' && first_ch != '$' {
281 return false;
282 }
283
284 state.advance(first_ch.len_utf8());
285
286 while let Some(ch) = state.peek() {
288 if ch.is_alphanumeric() || ch == '_' || ch == '$' { state.advance(ch.len_utf8()) } else { break }
289 }
290
291 let end = state.get_position();
292 let text = state.get_text_in((start..end).into());
293 let kind = self.keyword_or_identifier(text.as_ref());
294 state.add_token(kind, start, end);
295 true
296 }
297 else {
298 false
299 }
300 }
301
302 fn keyword_or_identifier(&self, text: &str) -> GroovyTokenType {
304 match text {
305 "abstract" => GroovyTokenType::AbstractKeyword,
307 "as" => GroovyTokenType::AsKeyword,
308 "assert" => GroovyTokenType::AssertKeyword,
309 "break" => GroovyTokenType::BreakKeyword,
310 "case" => GroovyTokenType::CaseKeyword,
311 "catch" => GroovyTokenType::CatchKeyword,
312 "class" => GroovyTokenType::ClassKeyword,
313 "const" => GroovyTokenType::ConstKeyword,
314 "continue" => GroovyTokenType::ContinueKeyword,
315 "def" => GroovyTokenType::DefKeyword,
316 "default" => GroovyTokenType::DefaultKeyword,
317 "do" => GroovyTokenType::DoKeyword,
318 "else" => GroovyTokenType::ElseKeyword,
319 "enum" => GroovyTokenType::EnumKeyword,
320 "extends" => GroovyTokenType::ExtendsKeyword,
321 "final" => GroovyTokenType::FinalKeyword,
322 "finally" => GroovyTokenType::FinallyKeyword,
323 "for" => GroovyTokenType::ForKeyword,
324 "goto" => GroovyTokenType::GotoKeyword,
325 "if" => GroovyTokenType::IfKeyword,
326 "implements" => GroovyTokenType::ImplementsKeyword,
327 "import" => GroovyTokenType::ImportKeyword,
328 "in" => GroovyTokenType::InKeyword,
329 "instanceof" => GroovyTokenType::InstanceofKeyword,
330 "interface" => GroovyTokenType::InterfaceKeyword,
331 "native" => GroovyTokenType::NativeKeyword,
332 "new" => GroovyTokenType::NewKeyword,
333 "package" => GroovyTokenType::PackageKeyword,
334 "private" => GroovyTokenType::PrivateKeyword,
335 "protected" => GroovyTokenType::ProtectedKeyword,
336 "public" => GroovyTokenType::PublicKeyword,
337 "return" => GroovyTokenType::ReturnKeyword,
338 "static" => GroovyTokenType::StaticKeyword,
339 "strictfp" => GroovyTokenType::StrictfpKeyword,
340 "super" => GroovyTokenType::SuperKeyword,
341 "switch" => GroovyTokenType::SwitchKeyword,
342 "synchronized" => GroovyTokenType::SynchronizedKeyword,
343 "this" => GroovyTokenType::ThisKeyword,
344 "throw" => GroovyTokenType::ThrowKeyword,
345 "throws" => GroovyTokenType::ThrowsKeyword,
346 "trait" => GroovyTokenType::TraitKeyword,
347 "transient" => GroovyTokenType::TransientKeyword,
348 "try" => GroovyTokenType::TryKeyword,
349 "void" => GroovyTokenType::VoidKeyword,
350 "volatile" => GroovyTokenType::VolatileKeyword,
351 "while" => GroovyTokenType::WhileKeyword,
352
353 "true" | "false" => GroovyTokenType::BooleanLiteral,
355 "null" => GroovyTokenType::NullLiteral,
356
357 _ => GroovyTokenType::Identifier,
359 }
360 }
361
362 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
364 let start = state.get_position();
365
366 if state.consume_if_starts_with(">>>") {
368 state.add_token(GroovyTokenType::UnsignedRightShift, start, state.get_position());
369 return true;
370 }
371 if state.consume_if_starts_with("<=>") {
372 state.add_token(GroovyTokenType::Spaceship, start, state.get_position());
373 return true;
374 }
375
376 if state.consume_if_starts_with("**") {
378 state.add_token(GroovyTokenType::Power, start, state.get_position());
379 return true;
380 }
381 if state.consume_if_starts_with("+=") {
382 state.add_token(GroovyTokenType::PlusAssign, start, state.get_position());
383 return true;
384 }
385 if state.consume_if_starts_with("-=") {
386 state.add_token(GroovyTokenType::MinusAssign, start, state.get_position());
387 return true;
388 }
389 if state.consume_if_starts_with("*=") {
390 state.add_token(GroovyTokenType::StarAssign, start, state.get_position());
391 return true;
392 }
393 if state.consume_if_starts_with("/=") {
394 state.add_token(GroovyTokenType::SlashAssign, start, state.get_position());
395 return true;
396 }
397 if state.consume_if_starts_with("%=") {
398 state.add_token(GroovyTokenType::PercentAssign, start, state.get_position());
399 return true;
400 }
401 if state.consume_if_starts_with("**=") {
402 state.add_token(GroovyTokenType::PowerAssign, start, state.get_position());
403 return true;
404 }
405 if state.consume_if_starts_with("==") {
406 state.add_token(GroovyTokenType::Equal, start, state.get_position());
407 return true;
408 }
409 if state.consume_if_starts_with("!=") {
410 state.add_token(GroovyTokenType::NotEqual, start, state.get_position());
411 return true;
412 }
413 if state.consume_if_starts_with("<=") {
414 state.add_token(GroovyTokenType::LessEqual, start, state.get_position());
415 return true;
416 }
417 if state.consume_if_starts_with(">=") {
418 state.add_token(GroovyTokenType::GreaterEqual, start, state.get_position());
419 return true;
420 }
421 if state.consume_if_starts_with("&&") {
422 state.add_token(GroovyTokenType::LogicalAnd, start, state.get_position());
423 return true;
424 }
425 if state.consume_if_starts_with("||") {
426 state.add_token(GroovyTokenType::LogicalOr, start, state.get_position());
427 return true;
428 }
429 if state.consume_if_starts_with("<<") {
430 state.add_token(GroovyTokenType::LeftShift, start, state.get_position());
431 return true;
432 }
433 if state.consume_if_starts_with(">>") {
434 state.add_token(GroovyTokenType::RightShift, start, state.get_position());
435 return true;
436 }
437 if state.consume_if_starts_with("++") {
438 state.add_token(GroovyTokenType::Increment, start, state.get_position());
439 return true;
440 }
441 if state.consume_if_starts_with("--") {
442 state.add_token(GroovyTokenType::Decrement, start, state.get_position());
443 return true;
444 }
445 if state.consume_if_starts_with("?:") {
446 state.add_token(GroovyTokenType::Elvis, start, state.get_position());
447 return true;
448 }
449 if state.consume_if_starts_with("?.") {
450 state.add_token(GroovyTokenType::SafeNavigation, start, state.get_position());
451 return true;
452 }
453
454 false
455 }
456
457 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
459 if let Some(ch) = state.peek() {
460 let start = state.get_position();
461 let kind = match ch {
462 '+' => Some(GroovyTokenType::Plus),
463 '-' => Some(GroovyTokenType::Minus),
464 '*' => Some(GroovyTokenType::Star),
465 '/' => Some(GroovyTokenType::Slash),
466 '%' => Some(GroovyTokenType::Percent),
467 '=' => Some(GroovyTokenType::Assign),
468 '<' => Some(GroovyTokenType::Less),
469 '>' => Some(GroovyTokenType::Greater),
470 '!' => Some(GroovyTokenType::LogicalNot),
471 '&' => Some(GroovyTokenType::BitAnd),
472 '|' => Some(GroovyTokenType::BitOr),
473 '^' => Some(GroovyTokenType::BitXor),
474 '~' => Some(GroovyTokenType::BitNot),
475 '?' => Some(GroovyTokenType::Question),
476 ':' => Some(GroovyTokenType::Colon),
477 '(' => Some(GroovyTokenType::LeftParen),
478 ')' => Some(GroovyTokenType::RightParen),
479 '[' => Some(GroovyTokenType::LeftBracket),
480 ']' => Some(GroovyTokenType::RightBracket),
481 '{' => Some(GroovyTokenType::LeftBrace),
482 '}' => Some(GroovyTokenType::RightBrace),
483 ',' => Some(GroovyTokenType::Comma),
484 '.' => Some(GroovyTokenType::Period),
485 ';' => Some(GroovyTokenType::Semicolon),
486 '@' => Some(GroovyTokenType::At),
487 _ => None,
488 };
489
490 if let Some(token_kind) = kind {
491 state.advance(ch.len_utf8());
492 let end = state.get_position();
493 state.add_token(token_kind, start, end);
494 true
495 }
496 else {
497 false
498 }
499 }
500 else {
501 false
502 }
503 }
504}