1use crate::{kind::ValaSyntaxKind, language::ValaLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, ValaLanguage>;
10
11static VALA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static VALA_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
13static VALA_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14static VALA_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
15
16#[derive(Clone, Debug)]
17pub struct ValaLexer<'config> {
18 _config: &'config ValaLanguage,
19}
20
21impl<'config> Lexer<ValaLanguage> for ValaLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ValaLanguage>) -> LexOutput<ValaLanguage> {
23 let mut state: State<'_, S> = LexerState::new(source);
24 let result = self.run(&mut state);
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> ValaLexer<'config> {
30 pub fn new(config: &'config ValaLanguage) -> Self {
31 Self { _config: config }
32 }
33
34 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
35 while state.not_at_end() {
36 let safe_point = state.get_position();
37
38 if self.skip_whitespace(state) {
39 continue;
40 }
41
42 if self.skip_comment(state) {
43 continue;
44 }
45
46 if self.lex_string_literal(state) {
47 continue;
48 }
49
50 if self.lex_char_literal(state) {
51 continue;
52 }
53
54 if self.lex_number_literal(state) {
55 continue;
56 }
57
58 if self.lex_identifier_or_keyword(state) {
59 continue;
60 }
61
62 if self.lex_operators(state) {
63 continue;
64 }
65
66 if self.lex_single_char_tokens(state) {
67 continue;
68 }
69
70 state.advance_if_dead_lock(safe_point);
71 }
72
73 state.add_eof();
75 Ok(())
76 }
77
78 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
79 VALA_WHITESPACE.scan(state, ValaSyntaxKind::Whitespace)
80 }
81
82 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
83 VALA_COMMENT.scan(state, ValaSyntaxKind::LineComment, ValaSyntaxKind::BlockComment)
84 }
85
86 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
87 VALA_STRING.scan(state, ValaSyntaxKind::StringLiteral)
88 }
89
90 fn lex_char_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
91 VALA_CHAR.scan(state, ValaSyntaxKind::CharLiteral)
92 }
93
94 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
95 let start = state.get_position();
96 let first = match state.peek() {
97 Some(c) => c,
98 None => return false,
99 };
100
101 if !first.is_ascii_digit() {
102 return false;
103 }
104
105 let mut is_float = false;
106
107 if first == '0' {
109 match state.peek_next_n(1) {
110 Some('x') | Some('X') => {
111 state.advance(2);
112 while let Some(c) = state.peek() {
113 if c.is_ascii_hexdigit() || c == '_' {
114 state.advance(1);
115 }
116 else {
117 break;
118 }
119 }
120 }
121 Some('b') | Some('B') => {
122 state.advance(2);
123 while let Some(c) = state.peek() {
124 if c == '0' || c == '1' || c == '_' {
125 state.advance(1);
126 }
127 else {
128 break;
129 }
130 }
131 }
132 Some('o') | Some('O') => {
133 state.advance(2);
134 while let Some(c) = state.peek() {
135 if ('0'..='7').contains(&c) || c == '_' {
136 state.advance(1);
137 }
138 else {
139 break;
140 }
141 }
142 }
143 _ => {
144 state.advance(1);
145 while let Some(c) = state.peek() {
146 if c.is_ascii_digit() || c == '_' {
147 state.advance(1);
148 }
149 else {
150 break;
151 }
152 }
153 }
154 }
155 }
156 else {
157 state.advance(1);
158 while let Some(c) = state.peek() {
159 if c.is_ascii_digit() || c == '_' {
160 state.advance(1);
161 }
162 else {
163 break;
164 }
165 }
166 }
167
168 if state.peek() == Some('.') {
170 let n1 = state.peek_next_n(1);
171 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
172 is_float = true;
173 state.advance(1); while let Some(c) = state.peek() {
175 if c.is_ascii_digit() || c == '_' {
176 state.advance(1);
177 }
178 else {
179 break;
180 }
181 }
182 }
183 }
184
185 if let Some(c) = state.peek() {
187 if c == 'e' || c == 'E' {
188 let n1 = state.peek_next_n(1);
189 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
190 is_float = true;
191 state.advance(1);
192 if let Some(sign) = state.peek() {
193 if sign == '+' || sign == '-' {
194 state.advance(1);
195 }
196 }
197 while let Some(d) = state.peek() {
198 if d.is_ascii_digit() || d == '_' {
199 state.advance(1);
200 }
201 else {
202 break;
203 }
204 }
205 }
206 }
207 }
208
209 while let Some(c) = state.peek() {
211 if c.is_ascii_alphabetic() {
212 state.advance(1);
213 }
214 else {
215 break;
216 }
217 }
218
219 let end = state.get_position();
220 state.add_token(if is_float { ValaSyntaxKind::FloatLiteral } else { ValaSyntaxKind::IntegerLiteral }, start, end);
221 true
222 }
223
224 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
225 let start = state.get_position();
226 let ch = match state.peek() {
227 Some(c) => c,
228 None => return false,
229 };
230
231 if !(ch.is_ascii_alphabetic() || ch == '_') {
232 return false;
233 }
234
235 state.advance(ch.len_utf8());
236 while let Some(c) = state.peek() {
237 if c.is_ascii_alphanumeric() || c == '_' {
238 state.advance(c.len_utf8());
239 }
240 else {
241 break;
242 }
243 }
244
245 let end = state.get_position();
246 let text = state.get_text_in(oak_core::Range { start, end });
247 let kind = match text.as_ref() {
248 "abstract" => ValaSyntaxKind::AbstractKw,
249 "as" => ValaSyntaxKind::AsKw,
250 "base" => ValaSyntaxKind::BaseKw,
251 "break" => ValaSyntaxKind::BreakKw,
252 "case" => ValaSyntaxKind::CaseKw,
253 "catch" => ValaSyntaxKind::CatchKw,
254 "class" => ValaSyntaxKind::ClassKw,
255 "const" => ValaSyntaxKind::ConstKw,
256 "construct" => ValaSyntaxKind::ConstructKw,
257 "continue" => ValaSyntaxKind::ContinueKw,
258 "default" => ValaSyntaxKind::DefaultKw,
259 "delegate" => ValaSyntaxKind::DelegateKw,
260 "delete" => ValaSyntaxKind::DeleteKw,
261 "do" => ValaSyntaxKind::DoKw,
262 "else" => ValaSyntaxKind::ElseKw,
263 "enum" => ValaSyntaxKind::EnumKw,
264 "ensures" => ValaSyntaxKind::EnsuresKw,
265 "errordomain" => ValaSyntaxKind::ErrordomainKw,
266 "extern" => ValaSyntaxKind::ExternKw,
267 "false" => ValaSyntaxKind::FalseKw,
268 "finally" => ValaSyntaxKind::FinallyKw,
269 "for" => ValaSyntaxKind::ForKw,
270 "foreach" => ValaSyntaxKind::ForeachKw,
271 "get" => ValaSyntaxKind::GetKw,
272 "if" => ValaSyntaxKind::IfKw,
273 "in" => ValaSyntaxKind::InKw,
274 "inline" => ValaSyntaxKind::InlineKw,
275 "interface" => ValaSyntaxKind::InterfaceKw,
276 "internal" => ValaSyntaxKind::InternalKw,
277 "is" => ValaSyntaxKind::IsKw,
278 "lock" => ValaSyntaxKind::LockKw,
279 "namespace" => ValaSyntaxKind::NamespaceKw,
280 "new" => ValaSyntaxKind::NewKw,
281 "null" => ValaSyntaxKind::NullKw,
282 "out" => ValaSyntaxKind::OutKw,
283 "override" => ValaSyntaxKind::OverrideKw,
284 "owned" => ValaSyntaxKind::OwnedKw,
285 "private" => ValaSyntaxKind::PrivateKw,
286 "protected" => ValaSyntaxKind::ProtectedKw,
287 "public" => ValaSyntaxKind::PublicKw,
288 "ref" => ValaSyntaxKind::RefKw,
289 "requires" => ValaSyntaxKind::RequiresKw,
290 "return" => ValaSyntaxKind::ReturnKw,
291 "set" => ValaSyntaxKind::SetKw,
292 "sizeof" => ValaSyntaxKind::SizeofKw,
293 "static" => ValaSyntaxKind::StaticKw,
294 "struct" => ValaSyntaxKind::StructKw,
295 "switch" => ValaSyntaxKind::SwitchKw,
296 "this" => ValaSyntaxKind::ThisKw,
297 "throw" => ValaSyntaxKind::ThrowKw,
298 "throws" => ValaSyntaxKind::ThrowsKw,
299 "true" => ValaSyntaxKind::TrueKw,
300 "try" => ValaSyntaxKind::TryKw,
301 "typeof" => ValaSyntaxKind::TypeofKw,
302 "unowned" => ValaSyntaxKind::UnownedKw,
303 "using" => ValaSyntaxKind::UsingKw,
304 "var" => ValaSyntaxKind::VarKw,
305 "virtual" => ValaSyntaxKind::VirtualKw,
306 "void" => ValaSyntaxKind::VoidKw,
307 "volatile" => ValaSyntaxKind::VolatileKw,
308 "weak" => ValaSyntaxKind::WeakKw,
309 "while" => ValaSyntaxKind::WhileKw,
310 "yield" => ValaSyntaxKind::YieldKw,
311 "bool" => ValaSyntaxKind::BoolKw,
313 "char" => ValaSyntaxKind::CharKw,
314 "uchar" => ValaSyntaxKind::UcharKw,
315 "int" => ValaSyntaxKind::IntKw,
316 "uint" => ValaSyntaxKind::UintKw,
317 "short" => ValaSyntaxKind::ShortKw,
318 "ushort" => ValaSyntaxKind::UshortKw,
319 "long" => ValaSyntaxKind::LongKw,
320 "ulong" => ValaSyntaxKind::UlongKw,
321 "int8" => ValaSyntaxKind::Int8Kw,
322 "uint8" => ValaSyntaxKind::Uint8Kw,
323 "int16" => ValaSyntaxKind::Int16Kw,
324 "uint16" => ValaSyntaxKind::Uint16Kw,
325 "int32" => ValaSyntaxKind::Int32Kw,
326 "uint32" => ValaSyntaxKind::Uint32Kw,
327 "int64" => ValaSyntaxKind::Int64Kw,
328 "uint64" => ValaSyntaxKind::Uint64Kw,
329 "float" => ValaSyntaxKind::FloatKw,
330 "double" => ValaSyntaxKind::DoubleKw,
331 "string" => ValaSyntaxKind::StringKw,
332 _ => ValaSyntaxKind::Identifier,
333 };
334
335 state.add_token(kind, start, state.get_position());
336 true
337 }
338
339 fn lex_operators<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
340 let start = state.get_position();
341
342 let patterns: &[(&str, ValaSyntaxKind)] = &[
344 ("<<", ValaSyntaxKind::LeftShift),
345 (">>", ValaSyntaxKind::RightShift),
346 ("==", ValaSyntaxKind::EqEq),
347 ("!=", ValaSyntaxKind::NotEq),
348 ("<=", ValaSyntaxKind::LessEq),
349 (">=", ValaSyntaxKind::GreaterEq),
350 ("&&", ValaSyntaxKind::AndAnd),
351 ("||", ValaSyntaxKind::OrOr),
352 ("++", ValaSyntaxKind::PlusPlus),
353 ("--", ValaSyntaxKind::MinusMinus),
354 ("+=", ValaSyntaxKind::PlusEq),
355 ("-=", ValaSyntaxKind::MinusEq),
356 ("*=", ValaSyntaxKind::StarEq),
357 ("/=", ValaSyntaxKind::SlashEq),
358 ("%=", ValaSyntaxKind::PercentEq),
359 ("->", ValaSyntaxKind::Arrow),
360 ];
361
362 for (pat, kind) in patterns {
363 if state.starts_with(pat) {
364 state.advance(pat.len());
365 state.add_token(*kind, start, state.get_position());
366 return true;
367 }
368 }
369
370 if let Some(ch) = state.current() {
371 let kind = match ch {
372 '+' => Some(ValaSyntaxKind::Plus),
373 '-' => Some(ValaSyntaxKind::Minus),
374 '*' => Some(ValaSyntaxKind::Star),
375 '/' => Some(ValaSyntaxKind::Slash),
376 '%' => Some(ValaSyntaxKind::Percent),
377 '^' => Some(ValaSyntaxKind::Caret),
378 '!' => Some(ValaSyntaxKind::Bang),
379 '&' => Some(ValaSyntaxKind::Ampersand),
380 '|' => Some(ValaSyntaxKind::Pipe),
381 '=' => Some(ValaSyntaxKind::Eq),
382 '>' => Some(ValaSyntaxKind::GreaterThan),
383 '<' => Some(ValaSyntaxKind::LessThan),
384 '.' => Some(ValaSyntaxKind::Dot),
385 ':' => Some(ValaSyntaxKind::Colon),
386 '?' => Some(ValaSyntaxKind::Question),
387 '~' => Some(ValaSyntaxKind::Tilde),
388 '\\' => Some(ValaSyntaxKind::Backslash),
389 '@' => Some(ValaSyntaxKind::At),
390 '#' => Some(ValaSyntaxKind::Hash),
391 '$' => Some(ValaSyntaxKind::Dollar),
392 _ => None,
393 };
394
395 if let Some(k) = kind {
396 state.advance(ch.len_utf8());
397 state.add_token(k, start, state.get_position());
398 return true;
399 }
400 }
401
402 false
403 }
404
405 fn lex_single_char_tokens<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
406 let start = state.get_position();
407 if let Some(ch) = state.current() {
408 let kind = match ch {
409 '(' => Some(ValaSyntaxKind::LeftParen),
410 ')' => Some(ValaSyntaxKind::RightParen),
411 '{' => Some(ValaSyntaxKind::LeftBrace),
412 '}' => Some(ValaSyntaxKind::RightBrace),
413 '[' => Some(ValaSyntaxKind::LeftBracket),
414 ']' => Some(ValaSyntaxKind::RightBracket),
415 ',' => Some(ValaSyntaxKind::Comma),
416 ';' => Some(ValaSyntaxKind::Semicolon),
417 _ => None,
418 };
419
420 if let Some(k) = kind {
421 state.advance(ch.len_utf8());
422 state.add_token(k, start, state.get_position());
423 return true;
424 }
425 }
426 false
427 }
428}