1#![doc = include_str!("readme.md")]
2use oak_core::Source;
3pub mod token_type;
5
6use crate::{language::ValaLanguage, lexer::token_type::ValaTokenType};
7use oak_core::{
8 Lexer, LexerCache, LexerState, OakError,
9 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, ValaLanguage>;
14
15static VALA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static VALA_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
17static VALA_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18static VALA_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
19
20#[derive(Clone, Debug)]
22pub struct ValaLexer<'config> {
23 config: &'config ValaLanguage,
24}
25
26impl<'config> Lexer<ValaLanguage> for ValaLexer<'config> {
27 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ValaLanguage>) -> LexOutput<ValaLanguage> {
28 let mut state: State<'_, S> = LexerState::new(source);
29 let result = self.run(&mut state);
30 state.finish_with_cache(result, cache)
31 }
32}
33
34impl<'config> ValaLexer<'config> {
35 pub fn new(config: &'config ValaLanguage) -> Self {
37 Self { config }
38 }
39
40 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_char_literal(state) {
57 continue;
58 }
59
60 if self.lex_number_literal(state) {
61 continue;
62 }
63
64 if self.lex_identifier_or_keyword(state) {
65 continue;
66 }
67
68 if self.lex_operators(state) {
69 continue;
70 }
71
72 if self.lex_single_char_tokens(state) {
73 continue;
74 }
75
76 state.advance_if_dead_lock(safe_point);
77 }
78
79 state.add_eof();
81 Ok(())
82 }
83
84 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
85 VALA_WHITESPACE.scan(state, ValaTokenType::Whitespace)
86 }
87
88 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
89 VALA_COMMENT.scan(state, ValaTokenType::LineComment, ValaTokenType::BlockComment)
90 }
91
92 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
93 VALA_STRING.scan(state, ValaTokenType::StringLiteral)
94 }
95
96 fn lex_char_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
97 VALA_CHAR.scan(state, ValaTokenType::CharLiteral)
98 }
99
100 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
101 let start = state.get_position();
102 let first = match state.peek() {
103 Some(c) => c,
104 None => return false,
105 };
106
107 if !first.is_ascii_digit() {
108 return false;
109 }
110
111 let mut is_float = false;
112
113 if first == '0' {
115 match state.peek_next_n(1) {
116 Some('x') | Some('X') => {
117 state.advance(2);
118 while let Some(c) = state.peek() {
119 if c.is_ascii_hexdigit() || c == '_' {
120 state.advance(1);
121 }
122 else {
123 break;
124 }
125 }
126 }
127 Some('b') | Some('B') => {
128 state.advance(2);
129 while let Some(c) = state.peek() {
130 if c == '0' || c == '1' || c == '_' {
131 state.advance(1);
132 }
133 else {
134 break;
135 }
136 }
137 }
138 Some('o') | Some('O') => {
139 state.advance(2);
140 while let Some(c) = state.peek() {
141 if ('0'..='7').contains(&c) || c == '_' {
142 state.advance(1);
143 }
144 else {
145 break;
146 }
147 }
148 }
149 _ => {
150 state.advance(1);
151 while let Some(c) = state.peek() {
152 if c.is_ascii_digit() || c == '_' {
153 state.advance(1);
154 }
155 else {
156 break;
157 }
158 }
159 }
160 }
161 }
162 else {
163 state.advance(1);
164 while let Some(c) = state.peek() {
165 if c.is_ascii_digit() || c == '_' {
166 state.advance(1);
167 }
168 else {
169 break;
170 }
171 }
172 }
173
174 if state.peek() == Some('.') {
176 let n1 = state.peek_next_n(1);
177 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
178 is_float = true;
179 state.advance(1); while let Some(c) = state.peek() {
181 if c.is_ascii_digit() || c == '_' {
182 state.advance(1);
183 }
184 else {
185 break;
186 }
187 }
188 }
189 }
190
191 if let Some(c) = state.peek() {
193 if c == 'e' || c == 'E' {
194 let n1 = state.peek_next_n(1);
195 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
196 is_float = true;
197 state.advance(1);
198 if let Some(sign) = state.peek() {
199 if sign == '+' || sign == '-' {
200 state.advance(1);
201 }
202 }
203 while let Some(d) = state.peek() {
204 if d.is_ascii_digit() || d == '_' {
205 state.advance(1);
206 }
207 else {
208 break;
209 }
210 }
211 }
212 }
213 }
214
215 while let Some(c) = state.peek() {
217 if c.is_ascii_alphabetic() {
218 state.advance(1);
219 }
220 else {
221 break;
222 }
223 }
224
225 let end = state.get_position();
226 state.add_token(if is_float { ValaTokenType::FloatLiteral } else { ValaTokenType::IntegerLiteral }, start, end);
227 true
228 }
229
230 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
231 let start = state.get_position();
232 let ch = match state.peek() {
233 Some(c) => c,
234 None => return false,
235 };
236
237 if !(ch.is_ascii_alphabetic() || ch == '_') {
238 return false;
239 }
240
241 state.advance(ch.len_utf8());
242 while let Some(c) = state.peek() {
243 if c.is_ascii_alphanumeric() || c == '_' {
244 state.advance(c.len_utf8());
245 }
246 else {
247 break;
248 }
249 }
250
251 let end = state.get_position();
252 let text = state.get_text_in(oak_core::Range { start, end });
253 let kind = match text.as_ref() {
254 "abstract" => ValaTokenType::AbstractKw,
255 "as" => ValaTokenType::AsKw,
256 "base" => ValaTokenType::BaseKw,
257 "break" => ValaTokenType::BreakKw,
258 "case" => ValaTokenType::CaseKw,
259 "catch" => ValaTokenType::CatchKw,
260 "class" => ValaTokenType::ClassKw,
261 "const" => ValaTokenType::ConstKw,
262 "construct" => ValaTokenType::ConstructKw,
263 "continue" => ValaTokenType::ContinueKw,
264 "default" => ValaTokenType::DefaultKw,
265 "delegate" => ValaTokenType::DelegateKw,
266 "delete" => ValaTokenType::DeleteKw,
267 "do" => ValaTokenType::DoKw,
268 "else" => ValaTokenType::ElseKw,
269 "enum" => ValaTokenType::EnumKw,
270 "ensures" => ValaTokenType::EnsuresKw,
271 "errordomain" => ValaTokenType::ErrordomainKw,
272 "extern" => ValaTokenType::ExternKw,
273 "false" => ValaTokenType::FalseKw,
274 "finally" => ValaTokenType::FinallyKw,
275 "for" => ValaTokenType::ForKw,
276 "foreach" => ValaTokenType::ForeachKw,
277 "get" => ValaTokenType::GetKw,
278 "if" => ValaTokenType::IfKw,
279 "in" => ValaTokenType::InKw,
280 "inline" => ValaTokenType::InlineKw,
281 "interface" => ValaTokenType::InterfaceKw,
282 "internal" => ValaTokenType::InternalKw,
283 "is" => ValaTokenType::IsKw,
284 "lock" => ValaTokenType::LockKw,
285 "namespace" => ValaTokenType::NamespaceKw,
286 "new" => ValaTokenType::NewKw,
287 "null" => ValaTokenType::NullKw,
288 "out" => ValaTokenType::OutKw,
289 "override" => ValaTokenType::OverrideKw,
290 "owned" => ValaTokenType::OwnedKw,
291 "private" => ValaTokenType::PrivateKw,
292 "protected" => ValaTokenType::ProtectedKw,
293 "public" => ValaTokenType::PublicKw,
294 "ref" => ValaTokenType::RefKw,
295 "requires" => ValaTokenType::RequiresKw,
296 "return" => ValaTokenType::ReturnKw,
297 "set" => ValaTokenType::SetKw,
298 "sizeof" => ValaTokenType::SizeofKw,
299 "static" => ValaTokenType::StaticKw,
300 "struct" => ValaTokenType::StructKw,
301 "switch" => ValaTokenType::SwitchKw,
302 "this" => ValaTokenType::ThisKw,
303 "throw" => ValaTokenType::ThrowKw,
304 "throws" => ValaTokenType::ThrowsKw,
305 "true" => ValaTokenType::TrueKw,
306 "try" => ValaTokenType::TryKw,
307 "typeof" => ValaTokenType::TypeofKw,
308 "unowned" => ValaTokenType::UnownedKw,
309 "using" => ValaTokenType::UsingKw,
310 "var" => ValaTokenType::VarKw,
311 "virtual" => ValaTokenType::VirtualKw,
312 "void" => ValaTokenType::VoidKw,
313 "volatile" => ValaTokenType::VolatileKw,
314 "weak" => ValaTokenType::WeakKw,
315 "while" => ValaTokenType::WhileKw,
316 "yield" => ValaTokenType::YieldKw,
317 "bool" => ValaTokenType::BoolKw,
319 "char" => ValaTokenType::CharKw,
320 "uchar" => ValaTokenType::UcharKw,
321 "int" => ValaTokenType::IntKw,
322 "uint" => ValaTokenType::UintKw,
323 "short" => ValaTokenType::ShortKw,
324 "ushort" => ValaTokenType::UshortKw,
325 "long" => ValaTokenType::LongKw,
326 "ulong" => ValaTokenType::UlongKw,
327 "int8" => ValaTokenType::Int8Kw,
328 "uint8" => ValaTokenType::Uint8Kw,
329 "int16" => ValaTokenType::Int16Kw,
330 "uint16" => ValaTokenType::Uint16Kw,
331 "int32" => ValaTokenType::Int32Kw,
332 "uint32" => ValaTokenType::Uint32Kw,
333 "int64" => ValaTokenType::Int64Kw,
334 "uint64" => ValaTokenType::Uint64Kw,
335 "float" => ValaTokenType::FloatKw,
336 "double" => ValaTokenType::DoubleKw,
337 "string" => ValaTokenType::StringKw,
338 _ => ValaTokenType::Identifier,
339 };
340
341 state.add_token(kind, start, state.get_position());
342 true
343 }
344
345 fn lex_operators<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
346 let start = state.get_position();
347
348 let patterns: &[(&str, ValaTokenType)] = &[
350 ("<<", ValaTokenType::LeftShift),
351 (">>", ValaTokenType::RightShift),
352 ("==", ValaTokenType::EqEq),
353 ("!=", ValaTokenType::NotEq),
354 ("<=", ValaTokenType::LessEq),
355 (">=", ValaTokenType::GreaterEq),
356 ("&&", ValaTokenType::AndAnd),
357 ("||", ValaTokenType::OrOr),
358 ("++", ValaTokenType::PlusPlus),
359 ("--", ValaTokenType::MinusMinus),
360 ("+=", ValaTokenType::PlusEq),
361 ("-=", ValaTokenType::MinusEq),
362 ("*=", ValaTokenType::StarEq),
363 ("/=", ValaTokenType::SlashEq),
364 ("%=", ValaTokenType::PercentEq),
365 ("->", ValaTokenType::Arrow),
366 ];
367
368 for (pat, kind) in patterns {
369 if state.starts_with(pat) {
370 state.advance(pat.len());
371 state.add_token(*kind, start, state.get_position());
372 return true;
373 }
374 }
375
376 if let Some(ch) = state.current() {
377 let kind = match ch {
378 '+' => Some(ValaTokenType::Plus),
379 '-' => Some(ValaTokenType::Minus),
380 '*' => Some(ValaTokenType::Star),
381 '/' => Some(ValaTokenType::Slash),
382 '%' => Some(ValaTokenType::Percent),
383 '^' => Some(ValaTokenType::Caret),
384 '!' => Some(ValaTokenType::Bang),
385 '&' => Some(ValaTokenType::Ampersand),
386 '|' => Some(ValaTokenType::Pipe),
387 '=' => Some(ValaTokenType::Eq),
388 '>' => Some(ValaTokenType::GreaterThan),
389 '<' => Some(ValaTokenType::LessThan),
390 '.' => Some(ValaTokenType::Dot),
391 ':' => Some(ValaTokenType::Colon),
392 '?' => Some(ValaTokenType::Question),
393 '~' => Some(ValaTokenType::Tilde),
394 '\\' => Some(ValaTokenType::Backslash),
395 '@' => Some(ValaTokenType::At),
396 '#' => Some(ValaTokenType::Hash),
397 '$' => Some(ValaTokenType::Dollar),
398 _ => None,
399 };
400
401 if let Some(k) = kind {
402 state.advance(ch.len_utf8());
403 state.add_token(k, start, state.get_position());
404 return true;
405 }
406 }
407
408 false
409 }
410
411 fn lex_single_char_tokens<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
412 let start = state.get_position();
413 if let Some(ch) = state.current() {
414 let kind = match ch {
415 '(' => Some(ValaTokenType::LeftParen),
416 ')' => Some(ValaTokenType::RightParen),
417 '{' => Some(ValaTokenType::LeftBrace),
418 '}' => Some(ValaTokenType::RightBrace),
419 '[' => Some(ValaTokenType::LeftBracket),
420 ']' => Some(ValaTokenType::RightBracket),
421 ',' => Some(ValaTokenType::Comma),
422 ';' => Some(ValaTokenType::Semicolon),
423 _ => None,
424 };
425
426 if let Some(k) = kind {
427 state.advance(ch.len_utf8());
428 state.add_token(k, start, state.get_position());
429 return true;
430 }
431 }
432 false
433 }
434}