Skip to main content

oak_go/lexer/
mod.rs

1use crate::{kind::GoSyntaxKind, language::GoLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, GoLanguage>;
5
6#[derive(Clone)]
7pub struct GoLexer<'config> {
8    _config: &'config GoLanguage,
9}
10
11impl<'config> Lexer<GoLanguage> for GoLexer<'config> {
12    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<GoLanguage>) -> LexOutput<GoLanguage> {
13        let mut state = State::new_with_cache(source, 0, cache);
14        let result = self.run(&mut state);
15        if result.is_ok() {
16            state.add_eof();
17        }
18        state.finish_with_cache(result, cache)
19    }
20}
21
22impl<'config> GoLexer<'config> {
23    pub fn new(config: &'config GoLanguage) -> Self {
24        Self { _config: config }
25    }
26
27    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28        while state.not_at_end() {
29            let safe_point = state.get_position();
30
31            if self.skip_whitespace(state) {
32                continue;
33            }
34
35            if self.skip_comment(state) {
36                continue;
37            }
38
39            if self.lex_identifier_or_keyword(state) {
40                continue;
41            }
42
43            if self.lex_literal(state) {
44                continue;
45            }
46
47            if self.lex_operator_or_delimiter(state) {
48                continue;
49            }
50
51            // Fallback
52            let start_pos = state.get_position();
53            if let Some(ch) = state.peek() {
54                state.advance(ch.len_utf8());
55                state.add_token(GoSyntaxKind::Error, start_pos, state.get_position());
56            }
57
58            state.advance_if_dead_lock(safe_point);
59        }
60
61        Ok(())
62    }
63
64    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
65        let start = state.get_position();
66        while let Some(ch) = state.peek() {
67            if ch.is_whitespace() {
68                state.advance(ch.len_utf8());
69            }
70            else {
71                break;
72            }
73        }
74        if state.get_position() > start {
75            state.add_token(GoSyntaxKind::Whitespace, start, state.get_position());
76            return true;
77        }
78        false
79    }
80
81    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82        let start = state.get_position();
83        if state.consume_if_starts_with("//") {
84            while let Some(ch) = state.peek() {
85                if ch == '\n' {
86                    break;
87                }
88                state.advance(ch.len_utf8());
89            }
90            state.add_token(GoSyntaxKind::Comment, start, state.get_position());
91            return true;
92        }
93        if state.consume_if_starts_with("/*") {
94            while let Some(ch) = state.peek() {
95                if state.consume_if_starts_with("*/") {
96                    break;
97                }
98                state.advance(ch.len_utf8());
99            }
100            state.add_token(GoSyntaxKind::Comment, start, state.get_position());
101            return true;
102        }
103        false
104    }
105
106    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
107        let start = state.get_position();
108        if let Some(ch) = state.peek() {
109            if ch.is_alphabetic() || ch == '_' {
110                state.advance(ch.len_utf8());
111                while let Some(ch) = state.peek() {
112                    if ch.is_alphanumeric() || ch == '_' {
113                        state.advance(ch.len_utf8());
114                    }
115                    else {
116                        break;
117                    }
118                }
119
120                let text = state.get_text_in(oak_core::Range { start, end: state.get_position() });
121                let kind = match text.as_ref() {
122                    "package" => GoSyntaxKind::Package,
123                    "import" => GoSyntaxKind::Import,
124                    "func" => GoSyntaxKind::Func,
125                    "var" => GoSyntaxKind::Var,
126                    "const" => GoSyntaxKind::Const,
127                    "type" => GoSyntaxKind::Type,
128                    "struct" => GoSyntaxKind::Struct,
129                    "interface" => GoSyntaxKind::Interface,
130                    "map" => GoSyntaxKind::Map,
131                    "chan" => GoSyntaxKind::Chan,
132                    "if" => GoSyntaxKind::If,
133                    "else" => GoSyntaxKind::Else,
134                    "for" => GoSyntaxKind::For,
135                    "range" => GoSyntaxKind::Range,
136                    "return" => GoSyntaxKind::Return,
137                    "break" => GoSyntaxKind::Break,
138                    "continue" => GoSyntaxKind::Continue,
139                    "goto" => GoSyntaxKind::Goto,
140                    "switch" => GoSyntaxKind::Switch,
141                    "case" => GoSyntaxKind::Case,
142                    "default" => GoSyntaxKind::Default,
143                    "defer" => GoSyntaxKind::Defer,
144                    "go" => GoSyntaxKind::Go,
145                    "select" => GoSyntaxKind::Select,
146                    "fallthrough" => GoSyntaxKind::Fallthrough,
147                    "true" => GoSyntaxKind::BoolLiteral,
148                    "false" => GoSyntaxKind::BoolLiteral,
149                    "nil" => GoSyntaxKind::NilLiteral,
150                    _ => GoSyntaxKind::Identifier,
151                };
152
153                state.add_token(kind, start, state.get_position());
154                return true;
155            }
156        }
157        false
158    }
159
160    fn lex_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
161        let start = state.get_position();
162        if let Some(ch) = state.peek() {
163            // String literal
164            if ch == '"' {
165                state.advance(ch.len_utf8());
166                while let Some(ch) = state.peek() {
167                    if ch == '"' {
168                        state.advance(ch.len_utf8());
169                        break;
170                    }
171                    if ch == '\\' {
172                        state.advance(ch.len_utf8());
173                        if let Some(next) = state.peek() {
174                            state.advance(next.len_utf8());
175                        }
176                    }
177                    else {
178                        state.advance(ch.len_utf8());
179                    }
180                }
181                state.add_token(GoSyntaxKind::StringLiteral, start, state.get_position());
182                return true;
183            }
184            // Raw string literal
185            if ch == '`' {
186                state.advance(ch.len_utf8());
187                while let Some(ch) = state.peek() {
188                    if ch == '`' {
189                        state.advance(ch.len_utf8());
190                        break;
191                    }
192                    state.advance(ch.len_utf8());
193                }
194                state.add_token(GoSyntaxKind::StringLiteral, start, state.get_position());
195                return true;
196            }
197            // Rune literal
198            if ch == '\'' {
199                state.advance(ch.len_utf8());
200                while let Some(ch) = state.peek() {
201                    if ch == '\'' {
202                        state.advance(ch.len_utf8());
203                        break;
204                    }
205                    if ch == '\\' {
206                        state.advance(ch.len_utf8());
207                        if let Some(next) = state.peek() {
208                            state.advance(next.len_utf8());
209                        }
210                    }
211                    else {
212                        state.advance(ch.len_utf8());
213                    }
214                }
215                state.add_token(GoSyntaxKind::RuneLiteral, start, state.get_position());
216                return true;
217            }
218            // Number literal
219            if ch.is_ascii_digit() {
220                state.advance(ch.len_utf8());
221                let mut has_dot = false;
222                while let Some(ch) = state.peek() {
223                    if ch.is_ascii_digit() {
224                        state.advance(ch.len_utf8());
225                    }
226                    else if ch == '.' && !has_dot {
227                        has_dot = true;
228                        state.advance(ch.len_utf8());
229                    }
230                    else {
231                        break;
232                    }
233                }
234                let kind = if has_dot { GoSyntaxKind::FloatLiteral } else { GoSyntaxKind::IntLiteral };
235                state.add_token(kind, start, state.get_position());
236                return true;
237            }
238        }
239        false
240    }
241
242    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
243        let start = state.get_position();
244        let kind = if state.consume_if_starts_with(":=") {
245            GoSyntaxKind::ColonAssign
246        }
247        else if state.consume_if_starts_with("...") {
248            GoSyntaxKind::Ellipsis
249        }
250        else if state.consume_if_starts_with("<<=") {
251            GoSyntaxKind::LeftShiftAssign
252        }
253        else if state.consume_if_starts_with(">>=") {
254            GoSyntaxKind::RightShiftAssign
255        }
256        else if state.consume_if_starts_with("&^=") {
257            GoSyntaxKind::AmpersandCaretAssign
258        }
259        else if state.consume_if_starts_with("==") {
260            GoSyntaxKind::Equal
261        }
262        else if state.consume_if_starts_with("!=") {
263            GoSyntaxKind::NotEqual
264        }
265        else if state.consume_if_starts_with("<=") {
266            GoSyntaxKind::LessEqual
267        }
268        else if state.consume_if_starts_with(">=") {
269            GoSyntaxKind::GreaterEqual
270        }
271        else if state.consume_if_starts_with("&&") {
272            GoSyntaxKind::LogicalAnd
273        }
274        else if state.consume_if_starts_with("||") {
275            GoSyntaxKind::LogicalOr
276        }
277        else if state.consume_if_starts_with("<<") {
278            GoSyntaxKind::LeftShift
279        }
280        else if state.consume_if_starts_with(">>") {
281            GoSyntaxKind::RightShift
282        }
283        else if state.consume_if_starts_with("&^") {
284            GoSyntaxKind::AmpersandCaret
285        }
286        else if state.consume_if_starts_with("++") {
287            GoSyntaxKind::Increment
288        }
289        else if state.consume_if_starts_with("--") {
290            GoSyntaxKind::Decrement
291        }
292        else if state.consume_if_starts_with("+=") {
293            GoSyntaxKind::PlusAssign
294        }
295        else if state.consume_if_starts_with("-=") {
296            GoSyntaxKind::MinusAssign
297        }
298        else if state.consume_if_starts_with("*=") {
299            GoSyntaxKind::StarAssign
300        }
301        else if state.consume_if_starts_with("/=") {
302            GoSyntaxKind::SlashAssign
303        }
304        else if state.consume_if_starts_with("%=") {
305            GoSyntaxKind::PercentAssign
306        }
307        else if state.consume_if_starts_with("&=") {
308            GoSyntaxKind::AmpersandAssign
309        }
310        else if state.consume_if_starts_with("|=") {
311            GoSyntaxKind::PipeAssign
312        }
313        else if state.consume_if_starts_with("^=") {
314            GoSyntaxKind::CaretAssign
315        }
316        else if state.consume_if_starts_with("<-") {
317            GoSyntaxKind::Arrow
318        }
319        else if state.consume_if_starts_with("{") {
320            GoSyntaxKind::LeftBrace
321        }
322        else if state.consume_if_starts_with("}") {
323            GoSyntaxKind::RightBrace
324        }
325        else if state.consume_if_starts_with("(") {
326            GoSyntaxKind::LeftParen
327        }
328        else if state.consume_if_starts_with(")") {
329            GoSyntaxKind::RightParen
330        }
331        else if state.consume_if_starts_with("[") {
332            GoSyntaxKind::LeftBracket
333        }
334        else if state.consume_if_starts_with("]") {
335            GoSyntaxKind::RightBracket
336        }
337        else if state.consume_if_starts_with(".") {
338            GoSyntaxKind::Dot
339        }
340        else if state.consume_if_starts_with(",") {
341            GoSyntaxKind::Comma
342        }
343        else if state.consume_if_starts_with(";") {
344            GoSyntaxKind::Semicolon
345        }
346        else if state.consume_if_starts_with(":") {
347            GoSyntaxKind::Colon
348        }
349        else if state.consume_if_starts_with("+") {
350            GoSyntaxKind::Plus
351        }
352        else if state.consume_if_starts_with("-") {
353            GoSyntaxKind::Minus
354        }
355        else if state.consume_if_starts_with("*") {
356            GoSyntaxKind::Star
357        }
358        else if state.consume_if_starts_with("/") {
359            GoSyntaxKind::Slash
360        }
361        else if state.consume_if_starts_with("%") {
362            GoSyntaxKind::Percent
363        }
364        else if state.consume_if_starts_with("&") {
365            GoSyntaxKind::Ampersand
366        }
367        else if state.consume_if_starts_with("|") {
368            GoSyntaxKind::Pipe
369        }
370        else if state.consume_if_starts_with("^") {
371            GoSyntaxKind::Caret
372        }
373        else if state.consume_if_starts_with("!") {
374            GoSyntaxKind::LogicalNot
375        }
376        else if state.consume_if_starts_with("<") {
377            GoSyntaxKind::Less
378        }
379        else if state.consume_if_starts_with(">") {
380            GoSyntaxKind::Greater
381        }
382        else if state.consume_if_starts_with("=") {
383            GoSyntaxKind::Assign
384        }
385        else {
386            return false;
387        };
388
389        state.add_token(kind, start, state.get_position());
390        true
391    }
392}