Skip to main content

oak_go/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token type definitions for the Go language.
3pub mod token_type;
4
5use crate::{language::GoLanguage, lexer::token_type::GoTokenType};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
7
8/// Lexer state type alias for the Go language.
9pub(crate) type State<'a, S> = LexerState<'a, S, GoLanguage>;
10
11/// Go language lexer that tokenizes Go source code.
12#[derive(Clone)]
13pub struct GoLexer<'config> {
14    /// The language configuration reference.
15    config: &'config GoLanguage,
16}
17
18impl<'config> Lexer<GoLanguage> for GoLexer<'config> {
19    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<GoLanguage>) -> LexOutput<GoLanguage> {
20        let mut state = State::new_with_cache(source, 0, cache);
21        let result = self.run(&mut state);
22        if result.is_ok() {
23            state.add_eof()
24        }
25        state.finish_with_cache(result, cache)
26    }
27}
28
29impl<'config> GoLexer<'config> {
30    /// Creates a new Go lexer with the given language configuration.
31    pub fn new(config: &'config GoLanguage) -> Self {
32        Self { config }
33    }
34
35    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36        while state.not_at_end() {
37            let safe_point = state.get_position();
38
39            if self.skip_whitespace(state) {
40                continue;
41            }
42
43            if self.skip_comment(state) {
44                continue;
45            }
46
47            if self.lex_identifier_or_keyword(state) {
48                continue;
49            }
50
51            if self.lex_literal(state) {
52                continue;
53            }
54
55            if self.lex_operator_or_delimiter(state) {
56                continue;
57            }
58
59            // Fallback
60            let start_pos = state.get_position();
61            if let Some(ch) = state.peek() {
62                state.advance(ch.len_utf8());
63                state.add_token(GoTokenType::Error, start_pos, state.get_position());
64            }
65
66            state.advance_if_dead_lock(safe_point)
67        }
68
69        Ok(())
70    }
71
72    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
73        let start = state.get_position();
74        while let Some(ch) = state.peek() {
75            if ch.is_whitespace() {
76                state.advance(ch.len_utf8());
77            }
78            else {
79                break;
80            }
81        }
82        if state.get_position() > start {
83            state.add_token(GoTokenType::Whitespace, start, state.get_position());
84            return true;
85        }
86        false
87    }
88
89    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90        let start = state.get_position();
91        if state.consume_if_starts_with("//") {
92            while let Some(ch) = state.peek() {
93                if ch == '\n' {
94                    break;
95                }
96                state.advance(ch.len_utf8());
97            }
98            state.add_token(GoTokenType::Comment, start, state.get_position());
99            return true;
100        }
101        if state.consume_if_starts_with("/*") {
102            while let Some(ch) = state.peek() {
103                if state.consume_if_starts_with("*/") {
104                    break;
105                }
106                state.advance(ch.len_utf8());
107            }
108            state.add_token(GoTokenType::Comment, start, state.get_position());
109            return true;
110        }
111        false
112    }
113
114    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
115        let start = state.get_position();
116        if let Some(ch) = state.peek() {
117            if ch.is_alphabetic() || ch == '_' {
118                state.advance(ch.len_utf8());
119                while let Some(ch) = state.peek() {
120                    if ch.is_alphanumeric() || ch == '_' {
121                        state.advance(ch.len_utf8());
122                    }
123                    else {
124                        break;
125                    }
126                }
127
128                let text = state.get_text_in(oak_core::Range { start, end: state.get_position() });
129                let kind = match text.as_ref() {
130                    "package" => GoTokenType::Package,
131                    "import" => GoTokenType::Import,
132                    "func" => GoTokenType::Func,
133                    "var" => GoTokenType::Var,
134                    "const" => GoTokenType::Const,
135                    "type" => GoTokenType::Type,
136                    "struct" => GoTokenType::Struct,
137                    "interface" => GoTokenType::Interface,
138                    "map" => GoTokenType::Map,
139                    "chan" => GoTokenType::Chan,
140                    "if" => GoTokenType::If,
141                    "else" => GoTokenType::Else,
142                    "for" => GoTokenType::For,
143                    "range" => GoTokenType::Range,
144                    "return" => GoTokenType::Return,
145                    "break" => GoTokenType::Break,
146                    "continue" => GoTokenType::Continue,
147                    "goto" => GoTokenType::Goto,
148                    "switch" => GoTokenType::Switch,
149                    "case" => GoTokenType::Case,
150                    "default" => GoTokenType::Default,
151                    "defer" => GoTokenType::Defer,
152                    "go" => GoTokenType::Go,
153                    "select" => GoTokenType::Select,
154                    "fallthrough" => GoTokenType::Fallthrough,
155                    "true" => GoTokenType::BoolLiteral,
156                    "false" => GoTokenType::BoolLiteral,
157                    "nil" => GoTokenType::NilLiteral,
158                    _ => GoTokenType::Identifier,
159                };
160                state.add_token(kind, start, state.get_position());
161                return true;
162            }
163        }
164        false
165    }
166
167    fn lex_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
168        let start = state.get_position();
169        if let Some(ch) = state.peek() {
170            // String literal
171            if ch == '"' {
172                state.advance(ch.len_utf8());
173                while let Some(ch) = state.peek() {
174                    if ch == '"' {
175                        state.advance(ch.len_utf8());
176                        break;
177                    }
178                    if ch == '\\' {
179                        state.advance(ch.len_utf8());
180                        if let Some(next) = state.peek() {
181                            state.advance(next.len_utf8());
182                        }
183                    }
184                    else {
185                        state.advance(ch.len_utf8());
186                    }
187                }
188                state.add_token(GoTokenType::StringLiteral, start, state.get_position());
189                return true;
190            }
191            // Raw string literal
192            if ch == '`' {
193                state.advance(ch.len_utf8());
194                while let Some(ch) = state.peek() {
195                    if ch == '`' {
196                        state.advance(ch.len_utf8());
197                        break;
198                    }
199                    state.advance(ch.len_utf8());
200                }
201                state.add_token(GoTokenType::StringLiteral, start, state.get_position());
202                return true;
203            }
204            // Rune literal
205            if ch == '\'' {
206                state.advance(ch.len_utf8());
207                while let Some(ch) = state.peek() {
208                    if ch == '\'' {
209                        state.advance(ch.len_utf8());
210                        break;
211                    }
212                    if ch == '\\' {
213                        state.advance(ch.len_utf8());
214                        if let Some(next) = state.peek() {
215                            state.advance(next.len_utf8());
216                        }
217                    }
218                    else {
219                        state.advance(ch.len_utf8());
220                    }
221                }
222                state.add_token(GoTokenType::RuneLiteral, start, state.get_position());
223                return true;
224            }
225            // Number literal
226            if ch.is_ascii_digit() {
227                state.advance(ch.len_utf8());
228                let mut has_dot = false;
229                while let Some(ch) = state.peek() {
230                    if ch.is_ascii_digit() {
231                        state.advance(ch.len_utf8());
232                    }
233                    else if ch == '.' && !has_dot {
234                        has_dot = true;
235                        state.advance(ch.len_utf8());
236                    }
237                    else {
238                        break;
239                    }
240                }
241                let kind = if has_dot { GoTokenType::FloatLiteral } else { GoTokenType::IntLiteral };
242                state.add_token(kind, start, state.get_position());
243                return true;
244            }
245        }
246        false
247    }
248
249    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
250        let start = state.get_position();
251        let kind = if state.consume_if_starts_with(":=") {
252            GoTokenType::ColonAssign
253        }
254        else if state.consume_if_starts_with("...") {
255            GoTokenType::Ellipsis
256        }
257        else if state.consume_if_starts_with("<<=") {
258            GoTokenType::LeftShiftAssign
259        }
260        else if state.consume_if_starts_with(">>=") {
261            GoTokenType::RightShiftAssign
262        }
263        else if state.consume_if_starts_with("&^=") {
264            GoTokenType::AmpersandCaretAssign
265        }
266        else if state.consume_if_starts_with("==") {
267            GoTokenType::Equal
268        }
269        else if state.consume_if_starts_with("!=") {
270            GoTokenType::NotEqual
271        }
272        else if state.consume_if_starts_with("<=") {
273            GoTokenType::LessEqual
274        }
275        else if state.consume_if_starts_with(">=") {
276            GoTokenType::GreaterEqual
277        }
278        else if state.consume_if_starts_with("&&") {
279            GoTokenType::LogicalAnd
280        }
281        else if state.consume_if_starts_with("||") {
282            GoTokenType::LogicalOr
283        }
284        else if state.consume_if_starts_with("<<") {
285            GoTokenType::LeftShift
286        }
287        else if state.consume_if_starts_with(">>") {
288            GoTokenType::RightShift
289        }
290        else if state.consume_if_starts_with("&^") {
291            GoTokenType::AmpersandCaret
292        }
293        else if state.consume_if_starts_with("++") {
294            GoTokenType::Increment
295        }
296        else if state.consume_if_starts_with("--") {
297            GoTokenType::Decrement
298        }
299        else if state.consume_if_starts_with("+=") {
300            GoTokenType::PlusAssign
301        }
302        else if state.consume_if_starts_with("-=") {
303            GoTokenType::MinusAssign
304        }
305        else if state.consume_if_starts_with("*=") {
306            GoTokenType::StarAssign
307        }
308        else if state.consume_if_starts_with("/=") {
309            GoTokenType::SlashAssign
310        }
311        else if state.consume_if_starts_with("%=") {
312            GoTokenType::PercentAssign
313        }
314        else if state.consume_if_starts_with("&=") {
315            GoTokenType::AmpersandAssign
316        }
317        else if state.consume_if_starts_with("|=") {
318            GoTokenType::PipeAssign
319        }
320        else if state.consume_if_starts_with("^=") {
321            GoTokenType::CaretAssign
322        }
323        else if state.consume_if_starts_with("<-") {
324            GoTokenType::Arrow
325        }
326        else if state.consume_if_starts_with("{") {
327            GoTokenType::LeftBrace
328        }
329        else if state.consume_if_starts_with("}") {
330            GoTokenType::RightBrace
331        }
332        else if state.consume_if_starts_with("(") {
333            GoTokenType::LeftParen
334        }
335        else if state.consume_if_starts_with(")") {
336            GoTokenType::RightParen
337        }
338        else if state.consume_if_starts_with("[") {
339            GoTokenType::LeftBracket
340        }
341        else if state.consume_if_starts_with("]") {
342            GoTokenType::RightBracket
343        }
344        else if state.consume_if_starts_with(".") {
345            GoTokenType::Dot
346        }
347        else if state.consume_if_starts_with(",") {
348            GoTokenType::Comma
349        }
350        else if state.consume_if_starts_with(";") {
351            GoTokenType::Semicolon
352        }
353        else if state.consume_if_starts_with(":") {
354            GoTokenType::Colon
355        }
356        else if state.consume_if_starts_with("+") {
357            GoTokenType::Plus
358        }
359        else if state.consume_if_starts_with("-") {
360            GoTokenType::Minus
361        }
362        else if state.consume_if_starts_with("*") {
363            GoTokenType::Star
364        }
365        else if state.consume_if_starts_with("/") {
366            GoTokenType::Slash
367        }
368        else if state.consume_if_starts_with("%") {
369            GoTokenType::Percent
370        }
371        else if state.consume_if_starts_with("&") {
372            GoTokenType::Ampersand
373        }
374        else if state.consume_if_starts_with("|") {
375            GoTokenType::Pipe
376        }
377        else if state.consume_if_starts_with("^") {
378            GoTokenType::Caret
379        }
380        else if state.consume_if_starts_with("!") {
381            GoTokenType::LogicalNot
382        }
383        else if state.consume_if_starts_with("<") {
384            GoTokenType::Less
385        }
386        else if state.consume_if_starts_with(">") {
387            GoTokenType::Greater
388        }
389        else if state.consume_if_starts_with("=") {
390            GoTokenType::Assign
391        }
392        else {
393            return false;
394        };
395
396        state.add_token(kind, start, state.get_position());
397        true
398    }
399}