Skip to main content

oak_vala/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2use oak_core::Source;
3/// Vala token types.
4pub mod token_type;
5
6use crate::{language::ValaLanguage, lexer::token_type::ValaTokenType};
7use oak_core::{
8    Lexer, LexerCache, LexerState, OakError,
9    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, ValaLanguage>;
14
15static VALA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static VALA_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
17static VALA_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18static VALA_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
19
20/// Lexer for the Vala language.
21#[derive(Clone, Debug)]
22pub struct ValaLexer<'config> {
23    config: &'config ValaLanguage,
24}
25
26impl<'config> Lexer<ValaLanguage> for ValaLexer<'config> {
27    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ValaLanguage>) -> LexOutput<ValaLanguage> {
28        let mut state: State<'_, S> = LexerState::new(source);
29        let result = self.run(&mut state);
30        state.finish_with_cache(result, cache)
31    }
32}
33
34impl<'config> ValaLexer<'config> {
35    /// Creates a new `ValaLexer` with the given language configuration.
36    pub fn new(config: &'config ValaLanguage) -> Self {
37        Self { config }
38    }
39
40    fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
41        while state.not_at_end() {
42            let safe_point = state.get_position();
43
44            if self.skip_whitespace(state) {
45                continue;
46            }
47
48            if self.skip_comment(state) {
49                continue;
50            }
51
52            if self.lex_string_literal(state) {
53                continue;
54            }
55
56            if self.lex_char_literal(state) {
57                continue;
58            }
59
60            if self.lex_number_literal(state) {
61                continue;
62            }
63
64            if self.lex_identifier_or_keyword(state) {
65                continue;
66            }
67
68            if self.lex_operators(state) {
69                continue;
70            }
71
72            if self.lex_single_char_tokens(state) {
73                continue;
74            }
75
76            state.advance_if_dead_lock(safe_point);
77        }
78
79        // Add EOF token
80        state.add_eof();
81        Ok(())
82    }
83
84    fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
85        VALA_WHITESPACE.scan(state, ValaTokenType::Whitespace)
86    }
87
88    fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
89        VALA_COMMENT.scan(state, ValaTokenType::LineComment, ValaTokenType::BlockComment)
90    }
91
92    fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
93        VALA_STRING.scan(state, ValaTokenType::StringLiteral)
94    }
95
96    fn lex_char_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
97        VALA_CHAR.scan(state, ValaTokenType::CharLiteral)
98    }
99
100    fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
101        let start = state.get_position();
102        let first = match state.peek() {
103            Some(c) => c,
104            None => return false,
105        };
106
107        if !first.is_ascii_digit() {
108            return false;
109        }
110
111        let mut is_float = false;
112
113        // Handle hex, octal, binary
114        if first == '0' {
115            match state.peek_next_n(1) {
116                Some('x') | Some('X') => {
117                    state.advance(2);
118                    while let Some(c) = state.peek() {
119                        if c.is_ascii_hexdigit() || c == '_' {
120                            state.advance(1);
121                        }
122                        else {
123                            break;
124                        }
125                    }
126                }
127                Some('b') | Some('B') => {
128                    state.advance(2);
129                    while let Some(c) = state.peek() {
130                        if c == '0' || c == '1' || c == '_' {
131                            state.advance(1);
132                        }
133                        else {
134                            break;
135                        }
136                    }
137                }
138                Some('o') | Some('O') => {
139                    state.advance(2);
140                    while let Some(c) = state.peek() {
141                        if ('0'..='7').contains(&c) || c == '_' {
142                            state.advance(1);
143                        }
144                        else {
145                            break;
146                        }
147                    }
148                }
149                _ => {
150                    state.advance(1);
151                    while let Some(c) = state.peek() {
152                        if c.is_ascii_digit() || c == '_' {
153                            state.advance(1);
154                        }
155                        else {
156                            break;
157                        }
158                    }
159                }
160            }
161        }
162        else {
163            state.advance(1);
164            while let Some(c) = state.peek() {
165                if c.is_ascii_digit() || c == '_' {
166                    state.advance(1);
167                }
168                else {
169                    break;
170                }
171            }
172        }
173
174        // Fractional part
175        if state.peek() == Some('.') {
176            let n1 = state.peek_next_n(1);
177            if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
178                is_float = true;
179                state.advance(1); // consume '.'
180                while let Some(c) = state.peek() {
181                    if c.is_ascii_digit() || c == '_' {
182                        state.advance(1);
183                    }
184                    else {
185                        break;
186                    }
187                }
188            }
189        }
190
191        // Exponent part
192        if let Some(c) = state.peek() {
193            if c == 'e' || c == 'E' {
194                let n1 = state.peek_next_n(1);
195                if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
196                    is_float = true;
197                    state.advance(1);
198                    if let Some(sign) = state.peek() {
199                        if sign == '+' || sign == '-' {
200                            state.advance(1);
201                        }
202                    }
203                    while let Some(d) = state.peek() {
204                        if d.is_ascii_digit() || d == '_' {
205                            state.advance(1);
206                        }
207                        else {
208                            break;
209                        }
210                    }
211                }
212            }
213        }
214
215        // Suffix letters (e.g., f, d, l)
216        while let Some(c) = state.peek() {
217            if c.is_ascii_alphabetic() {
218                state.advance(1);
219            }
220            else {
221                break;
222            }
223        }
224
225        let end = state.get_position();
226        state.add_token(if is_float { ValaTokenType::FloatLiteral } else { ValaTokenType::IntegerLiteral }, start, end);
227        true
228    }
229
230    fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
231        let start = state.get_position();
232        let ch = match state.peek() {
233            Some(c) => c,
234            None => return false,
235        };
236
237        if !(ch.is_ascii_alphabetic() || ch == '_') {
238            return false;
239        }
240
241        state.advance(ch.len_utf8());
242        while let Some(c) = state.peek() {
243            if c.is_ascii_alphanumeric() || c == '_' {
244                state.advance(c.len_utf8());
245            }
246            else {
247                break;
248            }
249        }
250
251        let end = state.get_position();
252        let text = state.get_text_in(oak_core::Range { start, end });
253        let kind = match text.as_ref() {
254            "abstract" => ValaTokenType::AbstractKw,
255            "as" => ValaTokenType::AsKw,
256            "base" => ValaTokenType::BaseKw,
257            "break" => ValaTokenType::BreakKw,
258            "case" => ValaTokenType::CaseKw,
259            "catch" => ValaTokenType::CatchKw,
260            "class" => ValaTokenType::ClassKw,
261            "const" => ValaTokenType::ConstKw,
262            "construct" => ValaTokenType::ConstructKw,
263            "continue" => ValaTokenType::ContinueKw,
264            "default" => ValaTokenType::DefaultKw,
265            "delegate" => ValaTokenType::DelegateKw,
266            "delete" => ValaTokenType::DeleteKw,
267            "do" => ValaTokenType::DoKw,
268            "else" => ValaTokenType::ElseKw,
269            "enum" => ValaTokenType::EnumKw,
270            "ensures" => ValaTokenType::EnsuresKw,
271            "errordomain" => ValaTokenType::ErrordomainKw,
272            "extern" => ValaTokenType::ExternKw,
273            "false" => ValaTokenType::FalseKw,
274            "finally" => ValaTokenType::FinallyKw,
275            "for" => ValaTokenType::ForKw,
276            "foreach" => ValaTokenType::ForeachKw,
277            "get" => ValaTokenType::GetKw,
278            "if" => ValaTokenType::IfKw,
279            "in" => ValaTokenType::InKw,
280            "inline" => ValaTokenType::InlineKw,
281            "interface" => ValaTokenType::InterfaceKw,
282            "internal" => ValaTokenType::InternalKw,
283            "is" => ValaTokenType::IsKw,
284            "lock" => ValaTokenType::LockKw,
285            "namespace" => ValaTokenType::NamespaceKw,
286            "new" => ValaTokenType::NewKw,
287            "null" => ValaTokenType::NullKw,
288            "out" => ValaTokenType::OutKw,
289            "override" => ValaTokenType::OverrideKw,
290            "owned" => ValaTokenType::OwnedKw,
291            "private" => ValaTokenType::PrivateKw,
292            "protected" => ValaTokenType::ProtectedKw,
293            "public" => ValaTokenType::PublicKw,
294            "ref" => ValaTokenType::RefKw,
295            "requires" => ValaTokenType::RequiresKw,
296            "return" => ValaTokenType::ReturnKw,
297            "set" => ValaTokenType::SetKw,
298            "sizeof" => ValaTokenType::SizeofKw,
299            "static" => ValaTokenType::StaticKw,
300            "struct" => ValaTokenType::StructKw,
301            "switch" => ValaTokenType::SwitchKw,
302            "this" => ValaTokenType::ThisKw,
303            "throw" => ValaTokenType::ThrowKw,
304            "throws" => ValaTokenType::ThrowsKw,
305            "true" => ValaTokenType::TrueKw,
306            "try" => ValaTokenType::TryKw,
307            "typeof" => ValaTokenType::TypeofKw,
308            "unowned" => ValaTokenType::UnownedKw,
309            "using" => ValaTokenType::UsingKw,
310            "var" => ValaTokenType::VarKw,
311            "virtual" => ValaTokenType::VirtualKw,
312            "void" => ValaTokenType::VoidKw,
313            "volatile" => ValaTokenType::VolatileKw,
314            "weak" => ValaTokenType::WeakKw,
315            "while" => ValaTokenType::WhileKw,
316            "yield" => ValaTokenType::YieldKw,
317            // Basic types
318            "bool" => ValaTokenType::BoolKw,
319            "char" => ValaTokenType::CharKw,
320            "uchar" => ValaTokenType::UcharKw,
321            "int" => ValaTokenType::IntKw,
322            "uint" => ValaTokenType::UintKw,
323            "short" => ValaTokenType::ShortKw,
324            "ushort" => ValaTokenType::UshortKw,
325            "long" => ValaTokenType::LongKw,
326            "ulong" => ValaTokenType::UlongKw,
327            "int8" => ValaTokenType::Int8Kw,
328            "uint8" => ValaTokenType::Uint8Kw,
329            "int16" => ValaTokenType::Int16Kw,
330            "uint16" => ValaTokenType::Uint16Kw,
331            "int32" => ValaTokenType::Int32Kw,
332            "uint32" => ValaTokenType::Uint32Kw,
333            "int64" => ValaTokenType::Int64Kw,
334            "uint64" => ValaTokenType::Uint64Kw,
335            "float" => ValaTokenType::FloatKw,
336            "double" => ValaTokenType::DoubleKw,
337            "string" => ValaTokenType::StringKw,
338            _ => ValaTokenType::Identifier,
339        };
340
341        state.add_token(kind, start, state.get_position());
342        true
343    }
344
345    fn lex_operators<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
346        let start = state.get_position();
347
348        // Prefer longer operators
349        let patterns: &[(&str, ValaTokenType)] = &[
350            ("<<", ValaTokenType::LeftShift),
351            (">>", ValaTokenType::RightShift),
352            ("==", ValaTokenType::EqEq),
353            ("!=", ValaTokenType::NotEq),
354            ("<=", ValaTokenType::LessEq),
355            (">=", ValaTokenType::GreaterEq),
356            ("&&", ValaTokenType::AndAnd),
357            ("||", ValaTokenType::OrOr),
358            ("++", ValaTokenType::PlusPlus),
359            ("--", ValaTokenType::MinusMinus),
360            ("+=", ValaTokenType::PlusEq),
361            ("-=", ValaTokenType::MinusEq),
362            ("*=", ValaTokenType::StarEq),
363            ("/=", ValaTokenType::SlashEq),
364            ("%=", ValaTokenType::PercentEq),
365            ("->", ValaTokenType::Arrow),
366        ];
367
368        for (pat, kind) in patterns {
369            if state.starts_with(pat) {
370                state.advance(pat.len());
371                state.add_token(*kind, start, state.get_position());
372                return true;
373            }
374        }
375
376        if let Some(ch) = state.current() {
377            let kind = match ch {
378                '+' => Some(ValaTokenType::Plus),
379                '-' => Some(ValaTokenType::Minus),
380                '*' => Some(ValaTokenType::Star),
381                '/' => Some(ValaTokenType::Slash),
382                '%' => Some(ValaTokenType::Percent),
383                '^' => Some(ValaTokenType::Caret),
384                '!' => Some(ValaTokenType::Bang),
385                '&' => Some(ValaTokenType::Ampersand),
386                '|' => Some(ValaTokenType::Pipe),
387                '=' => Some(ValaTokenType::Eq),
388                '>' => Some(ValaTokenType::GreaterThan),
389                '<' => Some(ValaTokenType::LessThan),
390                '.' => Some(ValaTokenType::Dot),
391                ':' => Some(ValaTokenType::Colon),
392                '?' => Some(ValaTokenType::Question),
393                '~' => Some(ValaTokenType::Tilde),
394                '\\' => Some(ValaTokenType::Backslash),
395                '@' => Some(ValaTokenType::At),
396                '#' => Some(ValaTokenType::Hash),
397                '$' => Some(ValaTokenType::Dollar),
398                _ => None,
399            };
400
401            if let Some(k) = kind {
402                state.advance(ch.len_utf8());
403                state.add_token(k, start, state.get_position());
404                return true;
405            }
406        }
407
408        false
409    }
410
411    fn lex_single_char_tokens<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
412        let start = state.get_position();
413        if let Some(ch) = state.current() {
414            let kind = match ch {
415                '(' => Some(ValaTokenType::LeftParen),
416                ')' => Some(ValaTokenType::RightParen),
417                '{' => Some(ValaTokenType::LeftBrace),
418                '}' => Some(ValaTokenType::RightBrace),
419                '[' => Some(ValaTokenType::LeftBracket),
420                ']' => Some(ValaTokenType::RightBracket),
421                ',' => Some(ValaTokenType::Comma),
422                ';' => Some(ValaTokenType::Semicolon),
423                _ => None,
424            };
425
426            if let Some(k) = kind {
427                state.advance(ch.len_utf8());
428                state.add_token(k, start, state.get_position());
429                return true;
430            }
431        }
432        false
433    }
434}