1#![doc = include_str!("readme.md")]
2use oak_core::Source;
3pub mod token_type;
4
5use crate::{language::ValaLanguage, lexer::token_type::ValaTokenType};
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9};
10use std::sync::LazyLock;
11
12type State<'a, S> = LexerState<'a, S, ValaLanguage>;
13
14static VALA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
15static VALA_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
16static VALA_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
17static VALA_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
18
19#[derive(Clone, Debug)]
20pub struct ValaLexer<'config> {
21 _config: &'config ValaLanguage,
22}
23
24impl<'config> Lexer<ValaLanguage> for ValaLexer<'config> {
25 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ValaLanguage>) -> LexOutput<ValaLanguage> {
26 let mut state: State<'_, S> = LexerState::new(source);
27 let result = self.run(&mut state);
28 state.finish_with_cache(result, cache)
29 }
30}
31
32impl<'config> ValaLexer<'config> {
33 pub fn new(config: &'config ValaLanguage) -> Self {
34 Self { _config: config }
35 }
36
37 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
38 while state.not_at_end() {
39 let safe_point = state.get_position();
40
41 if self.skip_whitespace(state) {
42 continue;
43 }
44
45 if self.skip_comment(state) {
46 continue;
47 }
48
49 if self.lex_string_literal(state) {
50 continue;
51 }
52
53 if self.lex_char_literal(state) {
54 continue;
55 }
56
57 if self.lex_number_literal(state) {
58 continue;
59 }
60
61 if self.lex_identifier_or_keyword(state) {
62 continue;
63 }
64
65 if self.lex_operators(state) {
66 continue;
67 }
68
69 if self.lex_single_char_tokens(state) {
70 continue;
71 }
72
73 state.advance_if_dead_lock(safe_point);
74 }
75
76 state.add_eof();
78 Ok(())
79 }
80
81 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
82 VALA_WHITESPACE.scan(state, ValaTokenType::Whitespace)
83 }
84
85 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
86 VALA_COMMENT.scan(state, ValaTokenType::LineComment, ValaTokenType::BlockComment)
87 }
88
89 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
90 VALA_STRING.scan(state, ValaTokenType::StringLiteral)
91 }
92
93 fn lex_char_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
94 VALA_CHAR.scan(state, ValaTokenType::CharLiteral)
95 }
96
97 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
98 let start = state.get_position();
99 let first = match state.peek() {
100 Some(c) => c,
101 None => return false,
102 };
103
104 if !first.is_ascii_digit() {
105 return false;
106 }
107
108 let mut is_float = false;
109
110 if first == '0' {
112 match state.peek_next_n(1) {
113 Some('x') | Some('X') => {
114 state.advance(2);
115 while let Some(c) = state.peek() {
116 if c.is_ascii_hexdigit() || c == '_' {
117 state.advance(1);
118 }
119 else {
120 break;
121 }
122 }
123 }
124 Some('b') | Some('B') => {
125 state.advance(2);
126 while let Some(c) = state.peek() {
127 if c == '0' || c == '1' || c == '_' {
128 state.advance(1);
129 }
130 else {
131 break;
132 }
133 }
134 }
135 Some('o') | Some('O') => {
136 state.advance(2);
137 while let Some(c) = state.peek() {
138 if ('0'..='7').contains(&c) || c == '_' {
139 state.advance(1);
140 }
141 else {
142 break;
143 }
144 }
145 }
146 _ => {
147 state.advance(1);
148 while let Some(c) = state.peek() {
149 if c.is_ascii_digit() || c == '_' {
150 state.advance(1);
151 }
152 else {
153 break;
154 }
155 }
156 }
157 }
158 }
159 else {
160 state.advance(1);
161 while let Some(c) = state.peek() {
162 if c.is_ascii_digit() || c == '_' {
163 state.advance(1);
164 }
165 else {
166 break;
167 }
168 }
169 }
170
171 if state.peek() == Some('.') {
173 let n1 = state.peek_next_n(1);
174 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
175 is_float = true;
176 state.advance(1); while let Some(c) = state.peek() {
178 if c.is_ascii_digit() || c == '_' {
179 state.advance(1);
180 }
181 else {
182 break;
183 }
184 }
185 }
186 }
187
188 if let Some(c) = state.peek() {
190 if c == 'e' || c == 'E' {
191 let n1 = state.peek_next_n(1);
192 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
193 is_float = true;
194 state.advance(1);
195 if let Some(sign) = state.peek() {
196 if sign == '+' || sign == '-' {
197 state.advance(1);
198 }
199 }
200 while let Some(d) = state.peek() {
201 if d.is_ascii_digit() || d == '_' {
202 state.advance(1);
203 }
204 else {
205 break;
206 }
207 }
208 }
209 }
210 }
211
212 while let Some(c) = state.peek() {
214 if c.is_ascii_alphabetic() {
215 state.advance(1);
216 }
217 else {
218 break;
219 }
220 }
221
222 let end = state.get_position();
223 state.add_token(if is_float { ValaTokenType::FloatLiteral } else { ValaTokenType::IntegerLiteral }, start, end);
224 true
225 }
226
227 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
228 let start = state.get_position();
229 let ch = match state.peek() {
230 Some(c) => c,
231 None => return false,
232 };
233
234 if !(ch.is_ascii_alphabetic() || ch == '_') {
235 return false;
236 }
237
238 state.advance(ch.len_utf8());
239 while let Some(c) = state.peek() {
240 if c.is_ascii_alphanumeric() || c == '_' {
241 state.advance(c.len_utf8());
242 }
243 else {
244 break;
245 }
246 }
247
248 let end = state.get_position();
249 let text = state.get_text_in(oak_core::Range { start, end });
250 let kind = match text.as_ref() {
251 "abstract" => ValaTokenType::AbstractKw,
252 "as" => ValaTokenType::AsKw,
253 "base" => ValaTokenType::BaseKw,
254 "break" => ValaTokenType::BreakKw,
255 "case" => ValaTokenType::CaseKw,
256 "catch" => ValaTokenType::CatchKw,
257 "class" => ValaTokenType::ClassKw,
258 "const" => ValaTokenType::ConstKw,
259 "construct" => ValaTokenType::ConstructKw,
260 "continue" => ValaTokenType::ContinueKw,
261 "default" => ValaTokenType::DefaultKw,
262 "delegate" => ValaTokenType::DelegateKw,
263 "delete" => ValaTokenType::DeleteKw,
264 "do" => ValaTokenType::DoKw,
265 "else" => ValaTokenType::ElseKw,
266 "enum" => ValaTokenType::EnumKw,
267 "ensures" => ValaTokenType::EnsuresKw,
268 "errordomain" => ValaTokenType::ErrordomainKw,
269 "extern" => ValaTokenType::ExternKw,
270 "false" => ValaTokenType::FalseKw,
271 "finally" => ValaTokenType::FinallyKw,
272 "for" => ValaTokenType::ForKw,
273 "foreach" => ValaTokenType::ForeachKw,
274 "get" => ValaTokenType::GetKw,
275 "if" => ValaTokenType::IfKw,
276 "in" => ValaTokenType::InKw,
277 "inline" => ValaTokenType::InlineKw,
278 "interface" => ValaTokenType::InterfaceKw,
279 "internal" => ValaTokenType::InternalKw,
280 "is" => ValaTokenType::IsKw,
281 "lock" => ValaTokenType::LockKw,
282 "namespace" => ValaTokenType::NamespaceKw,
283 "new" => ValaTokenType::NewKw,
284 "null" => ValaTokenType::NullKw,
285 "out" => ValaTokenType::OutKw,
286 "override" => ValaTokenType::OverrideKw,
287 "owned" => ValaTokenType::OwnedKw,
288 "private" => ValaTokenType::PrivateKw,
289 "protected" => ValaTokenType::ProtectedKw,
290 "public" => ValaTokenType::PublicKw,
291 "ref" => ValaTokenType::RefKw,
292 "requires" => ValaTokenType::RequiresKw,
293 "return" => ValaTokenType::ReturnKw,
294 "set" => ValaTokenType::SetKw,
295 "sizeof" => ValaTokenType::SizeofKw,
296 "static" => ValaTokenType::StaticKw,
297 "struct" => ValaTokenType::StructKw,
298 "switch" => ValaTokenType::SwitchKw,
299 "this" => ValaTokenType::ThisKw,
300 "throw" => ValaTokenType::ThrowKw,
301 "throws" => ValaTokenType::ThrowsKw,
302 "true" => ValaTokenType::TrueKw,
303 "try" => ValaTokenType::TryKw,
304 "typeof" => ValaTokenType::TypeofKw,
305 "unowned" => ValaTokenType::UnownedKw,
306 "using" => ValaTokenType::UsingKw,
307 "var" => ValaTokenType::VarKw,
308 "virtual" => ValaTokenType::VirtualKw,
309 "void" => ValaTokenType::VoidKw,
310 "volatile" => ValaTokenType::VolatileKw,
311 "weak" => ValaTokenType::WeakKw,
312 "while" => ValaTokenType::WhileKw,
313 "yield" => ValaTokenType::YieldKw,
314 "bool" => ValaTokenType::BoolKw,
316 "char" => ValaTokenType::CharKw,
317 "uchar" => ValaTokenType::UcharKw,
318 "int" => ValaTokenType::IntKw,
319 "uint" => ValaTokenType::UintKw,
320 "short" => ValaTokenType::ShortKw,
321 "ushort" => ValaTokenType::UshortKw,
322 "long" => ValaTokenType::LongKw,
323 "ulong" => ValaTokenType::UlongKw,
324 "int8" => ValaTokenType::Int8Kw,
325 "uint8" => ValaTokenType::Uint8Kw,
326 "int16" => ValaTokenType::Int16Kw,
327 "uint16" => ValaTokenType::Uint16Kw,
328 "int32" => ValaTokenType::Int32Kw,
329 "uint32" => ValaTokenType::Uint32Kw,
330 "int64" => ValaTokenType::Int64Kw,
331 "uint64" => ValaTokenType::Uint64Kw,
332 "float" => ValaTokenType::FloatKw,
333 "double" => ValaTokenType::DoubleKw,
334 "string" => ValaTokenType::StringKw,
335 _ => ValaTokenType::Identifier,
336 };
337
338 state.add_token(kind, start, state.get_position());
339 true
340 }
341
342 fn lex_operators<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
343 let start = state.get_position();
344
345 let patterns: &[(&str, ValaTokenType)] = &[
347 ("<<", ValaTokenType::LeftShift),
348 (">>", ValaTokenType::RightShift),
349 ("==", ValaTokenType::EqEq),
350 ("!=", ValaTokenType::NotEq),
351 ("<=", ValaTokenType::LessEq),
352 (">=", ValaTokenType::GreaterEq),
353 ("&&", ValaTokenType::AndAnd),
354 ("||", ValaTokenType::OrOr),
355 ("++", ValaTokenType::PlusPlus),
356 ("--", ValaTokenType::MinusMinus),
357 ("+=", ValaTokenType::PlusEq),
358 ("-=", ValaTokenType::MinusEq),
359 ("*=", ValaTokenType::StarEq),
360 ("/=", ValaTokenType::SlashEq),
361 ("%=", ValaTokenType::PercentEq),
362 ("->", ValaTokenType::Arrow),
363 ];
364
365 for (pat, kind) in patterns {
366 if state.starts_with(pat) {
367 state.advance(pat.len());
368 state.add_token(*kind, start, state.get_position());
369 return true;
370 }
371 }
372
373 if let Some(ch) = state.current() {
374 let kind = match ch {
375 '+' => Some(ValaTokenType::Plus),
376 '-' => Some(ValaTokenType::Minus),
377 '*' => Some(ValaTokenType::Star),
378 '/' => Some(ValaTokenType::Slash),
379 '%' => Some(ValaTokenType::Percent),
380 '^' => Some(ValaTokenType::Caret),
381 '!' => Some(ValaTokenType::Bang),
382 '&' => Some(ValaTokenType::Ampersand),
383 '|' => Some(ValaTokenType::Pipe),
384 '=' => Some(ValaTokenType::Eq),
385 '>' => Some(ValaTokenType::GreaterThan),
386 '<' => Some(ValaTokenType::LessThan),
387 '.' => Some(ValaTokenType::Dot),
388 ':' => Some(ValaTokenType::Colon),
389 '?' => Some(ValaTokenType::Question),
390 '~' => Some(ValaTokenType::Tilde),
391 '\\' => Some(ValaTokenType::Backslash),
392 '@' => Some(ValaTokenType::At),
393 '#' => Some(ValaTokenType::Hash),
394 '$' => Some(ValaTokenType::Dollar),
395 _ => None,
396 };
397
398 if let Some(k) = kind {
399 state.advance(ch.len_utf8());
400 state.add_token(k, start, state.get_position());
401 return true;
402 }
403 }
404
405 false
406 }
407
408 fn lex_single_char_tokens<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
409 let start = state.get_position();
410 if let Some(ch) = state.current() {
411 let kind = match ch {
412 '(' => Some(ValaTokenType::LeftParen),
413 ')' => Some(ValaTokenType::RightParen),
414 '{' => Some(ValaTokenType::LeftBrace),
415 '}' => Some(ValaTokenType::RightBrace),
416 '[' => Some(ValaTokenType::LeftBracket),
417 ']' => Some(ValaTokenType::RightBracket),
418 ',' => Some(ValaTokenType::Comma),
419 ';' => Some(ValaTokenType::Semicolon),
420 _ => None,
421 };
422
423 if let Some(k) = kind {
424 state.advance(ch.len_utf8());
425 state.add_token(k, start, state.get_position());
426 return true;
427 }
428 }
429 false
430 }
431}