Skip to main content

oak_gsgl/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token type definitions.
3pub mod token_type;
4
5use crate::{language::GsglLanguage, lexer::token_type::GsglTokenType};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
7
8pub(crate) type State<'a, S> = LexerState<'a, S, GsglLanguage>;
9
10/// GSGL lexer.
11#[derive(Clone, Debug)]
12pub struct GsglLexer<'config> {
13    /// The language configuration.
14    pub config: &'config GsglLanguage,
15}
16
17impl<'config> GsglLexer<'config> {
18    /// Creates a new `GsglLexer`.
19    pub fn new(config: &'config GsglLanguage) -> Self {
20        Self { config }
21    }
22
23    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
24        while state.not_at_end() {
25            let start = state.get_position();
26
27            if self.skip_whitespace(state) {
28                continue;
29            }
30
31            if self.lex_newline(state) {
32                continue;
33            }
34
35            if self.lex_comment(state) {
36                continue;
37            }
38
39            if self.lex_string_literal(state) {
40                continue;
41            }
42
43            if self.lex_char_literal(state) {
44                continue;
45            }
46
47            if self.lex_number_literal(state) {
48                continue;
49            }
50
51            if self.lex_identifier_or_keyword(state) {
52                continue;
53            }
54
55            if self.lex_operator_or_delimiter(state) {
56                continue;
57            }
58
59            // If no method handles the current character, create an error token and advance
60            if let Some(ch) = state.peek() {
61                state.advance(ch.len_utf8());
62                state.add_token(GsglTokenType::Error, start, state.get_position());
63            }
64            else {
65                break;
66            }
67        }
68
69        Ok(())
70    }
71
72    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
73        let start = state.get_position();
74
75        while let Some(ch) = state.peek() {
76            if ch == ' ' || ch == '\t' {
77                state.advance(1);
78            }
79            else {
80                break;
81            }
82        }
83
84        if state.get_position() > start {
85            state.add_token(GsglTokenType::Whitespace, start, state.get_position());
86            true
87        }
88        else {
89            false
90        }
91    }
92
93    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
94        let start = state.get_position();
95        if state.peek() == Some('\n') {
96            state.advance(1);
97            state.add_token(GsglTokenType::Newline, start, state.get_position());
98            true
99        }
100        else if state.peek() == Some('\r') && state.peek_next_n(1) == Some('\n') {
101            state.advance(2);
102            state.add_token(GsglTokenType::Newline, start, state.get_position());
103            true
104        }
105        else {
106            false
107        }
108    }
109
110    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
111        let start = state.get_position();
112
113        if state.peek() == Some('/') && state.peek_next_n(1) == Some('/') {
114            // Single-line comment
115            state.advance(2);
116            while let Some(ch) = state.peek() {
117                if ch == '\n' || ch == '\r' {
118                    break;
119                }
120                state.advance(1);
121            }
122            state.add_token(GsglTokenType::Comment, start, state.get_position());
123            true
124        }
125        else if state.peek() == Some('/') && state.peek_next_n(1) == Some('*') {
126            // Multi-line comment
127            state.advance(2);
128            while let Some(ch) = state.peek() {
129                if ch == '*' && state.peek_next_n(1) == Some('/') {
130                    state.advance(2);
131                    break;
132                }
133                state.advance(1);
134            }
135            state.add_token(GsglTokenType::Comment, start, state.get_position());
136            true
137        }
138        else {
139            false
140        }
141    }
142
143    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
144        let start = state.get_position();
145
146        if state.peek() == Some('"') {
147            state.advance(1); // Consume start quote
148
149            while let Some(ch) = state.peek() {
150                if ch == '"' {
151                    state.advance(1); // Consume end quote
152                    state.add_token(GsglTokenType::String, start, state.get_position());
153                    return true;
154                }
155                else if ch == '\\' {
156                    state.advance(1); // Consume escape character
157                    if state.peek().is_some() {
158                        state.advance(1); // Consume escaped character
159                    }
160                }
161                else {
162                    state.advance(1);
163                }
164            }
165
166            // Unterminated string
167            state.add_token(GsglTokenType::String, start, state.get_position());
168            true
169        }
170        else {
171            false
172        }
173    }
174
175    fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176        let start = state.get_position();
177
178        if state.peek() == Some('\'') {
179            state.advance(1); // Consume start single quote
180
181            if let Some(ch) = state.peek() {
182                if ch == '\\' {
183                    state.advance(1); // Consume escape character
184                    if state.peek().is_some() {
185                        state.advance(1); // Consume escaped character
186                    }
187                }
188                else if ch != '\'' {
189                    state.advance(1); // Consume character
190                }
191            }
192
193            if state.peek() == Some('\'') {
194                state.advance(1); // Consume end single quote
195            }
196
197            state.add_token(GsglTokenType::String, start, state.get_position());
198            true
199        }
200        else {
201            false
202        }
203    }
204
205    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206        let start = state.get_position();
207
208        if let Some(ch) = state.peek() {
209            if ch.is_ascii_digit() {
210                // Consume digits
211                while let Some(ch) = state.peek() {
212                    if ch.is_ascii_digit() {
213                        state.advance(1);
214                    }
215                    else {
216                        break;
217                    }
218                }
219
220                // Check for decimal point
221                if state.peek() == Some('.') && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
222                    state.advance(1); // Consume '.'
223                    while let Some(ch) = state.peek() {
224                        if ch.is_ascii_digit() {
225                            state.advance(1);
226                        }
227                        else {
228                            break;
229                        }
230                    }
231                }
232
233                // Check for scientific notation
234                if matches!(state.peek(), Some('e') | Some('E')) {
235                    state.advance(1);
236                    if matches!(state.peek(), Some('+') | Some('-')) {
237                        state.advance(1);
238                    }
239                    while let Some(ch) = state.peek() {
240                        if ch.is_ascii_digit() {
241                            state.advance(1);
242                        }
243                        else {
244                            break;
245                        }
246                    }
247                }
248
249                // Check for float suffix
250                if matches!(state.peek(), Some('f') | Some('F')) {
251                    state.advance(1);
252                }
253
254                state.add_token(GsglTokenType::Number, start, state.get_position());
255                return true;
256            }
257        }
258
259        false
260    }
261
262    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
263        let start = state.get_position();
264
265        if let Some(ch) = state.peek() {
266            if ch.is_ascii_alphabetic() || ch == '_' {
267                while let Some(ch) = state.peek() {
268                    if ch.is_ascii_alphanumeric() || ch == '_' {
269                        state.advance(1);
270                    }
271                    else {
272                        break;
273                    }
274                }
275
276                let end = state.get_position();
277                let text = state.get_text_in(oak_core::Range { start, end });
278                let kind = match text.as_ref() {
279                    "shader" => GsglTokenType::Shader,
280                    "vertex" => GsglTokenType::Vertex,
281                    "fragment" => GsglTokenType::Fragment,
282                    "geometry" => GsglTokenType::Geometry,
283                    "compute" => GsglTokenType::Compute,
284                    "uniform" => GsglTokenType::Uniform,
285                    "attribute" => GsglTokenType::Attribute,
286                    "varying" => GsglTokenType::Varying,
287                    "in" => GsglTokenType::In,
288                    "out" => GsglTokenType::Out,
289                    "inout" => GsglTokenType::Inout,
290                    "const" => GsglTokenType::Const,
291                    "struct" => GsglTokenType::Struct,
292                    "if" => GsglTokenType::If,
293                    "else" => GsglTokenType::Else,
294                    "for" => GsglTokenType::For,
295                    "while" => GsglTokenType::While,
296                    "do" => GsglTokenType::Do,
297                    "break" => GsglTokenType::Break,
298                    "continue" => GsglTokenType::Continue,
299                    "return" => GsglTokenType::Return,
300                    "discard" => GsglTokenType::Discard,
301                    "true" => GsglTokenType::True,
302                    "false" => GsglTokenType::False,
303                    "float" => GsglTokenType::Float,
304                    "int" => GsglTokenType::Int,
305                    "bool" => GsglTokenType::Bool,
306                    "vec2" => GsglTokenType::Vec2,
307                    "vec3" => GsglTokenType::Vec3,
308                    "vec4" => GsglTokenType::Vec4,
309                    "mat2" => GsglTokenType::Mat2,
310                    "mat3" => GsglTokenType::Mat3,
311                    "mat4" => GsglTokenType::Mat4,
312                    "sampler2D" => GsglTokenType::Sampler2D,
313                    "samplerCube" => GsglTokenType::SamplerCube,
314                    "void" => GsglTokenType::Void,
315                    _ => GsglTokenType::Identifier,
316                };
317
318                state.add_token(kind, start, state.get_position());
319                return true;
320            }
321        }
322
323        false
324    }
325
326    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
327        let start = state.get_position();
328
329        // Check for two-character operators
330        if let (Some(ch1), Some(ch2)) = (state.peek(), state.peek_next_n(1)) {
331            let two_char = format!("{}{}", ch1, ch2);
332            let kind = match two_char.as_str() {
333                "+=" => Some(GsglTokenType::PlusAssign),
334                "-=" => Some(GsglTokenType::MinusAssign),
335                "*=" => Some(GsglTokenType::StarAssign),
336                "/=" => Some(GsglTokenType::SlashAssign),
337                "==" => Some(GsglTokenType::Eq),
338                "!=" => Some(GsglTokenType::Ne),
339                "<=" => Some(GsglTokenType::Le),
340                ">=" => Some(GsglTokenType::Ge),
341                "&&" => Some(GsglTokenType::And),
342                "||" => Some(GsglTokenType::Or),
343                "<<" => Some(GsglTokenType::LeftShift),
344                ">>" => Some(GsglTokenType::RightShift),
345                _ => None,
346            };
347
348            if let Some(kind) = kind {
349                state.advance(2);
350                state.add_token(kind, start, state.get_position());
351                return true;
352            }
353        }
354
355        // Single-character operators and delimiters
356        if let Some(ch) = state.peek() {
357            let kind = match ch {
358                '+' => Some(GsglTokenType::Plus),
359                '-' => Some(GsglTokenType::Minus),
360                '*' => Some(GsglTokenType::Star),
361                '/' => Some(GsglTokenType::Slash),
362                '%' => Some(GsglTokenType::Percent),
363                '=' => Some(GsglTokenType::Assign),
364                '!' => Some(GsglTokenType::Not),
365                '<' => Some(GsglTokenType::Lt),
366                '>' => Some(GsglTokenType::Gt),
367                '&' => Some(GsglTokenType::BitAnd),
368                '|' => Some(GsglTokenType::BitOr),
369                '^' => Some(GsglTokenType::BitXor),
370                '~' => Some(GsglTokenType::BitNot),
371                '?' => Some(GsglTokenType::Question),
372                ':' => Some(GsglTokenType::Colon),
373                '#' => Some(GsglTokenType::Hash),
374                ';' => Some(GsglTokenType::Semicolon),
375                ',' => Some(GsglTokenType::Comma),
376                '.' => Some(GsglTokenType::Dot),
377                '(' => Some(GsglTokenType::LeftParen),
378                ')' => Some(GsglTokenType::RightParen),
379                '[' => Some(GsglTokenType::LeftBracket),
380                ']' => Some(GsglTokenType::RightBracket),
381                '{' => Some(GsglTokenType::LeftBrace),
382                '}' => Some(GsglTokenType::RightBrace),
383                _ => None,
384            };
385
386            if let Some(kind) = kind {
387                state.advance(ch.len_utf8());
388                state.add_token(kind, start, state.get_position());
389                return true;
390            }
391        }
392
393        false
394    }
395}
396
397impl<'config> Lexer<GsglLanguage> for GsglLexer<'config> {
398    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<GsglLanguage>) -> LexOutput<GsglLanguage> {
399        let mut state = State::new(source);
400        let result = self.run(&mut state);
401        if result.is_ok() {
402            state.add_eof();
403        }
404        state.finish_with_cache(result, cache)
405    }
406}